diff --git a/CMakeLists.txt b/CMakeLists.txt index dc39a641..4ec0c254 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,7 +169,7 @@ if (WIN32 AND NRI_ENABLE_D3D12_SUPPORT) set (COMPILE_DEFINITIONS ${COMPILE_DEFINITIONS} NRI_USE_D3D12=1) set (INPUT_LIBS_D3D12 ${INPUT_LIB_D3D12} ${INPUT_LIB_DXGI}) - file (GLOB D3D12_SOURCE "Source/D3D12/*.cpp" "Source/D3D12/*.h" "Source/D3D12/*.hpp") + file (GLOB D3D12_SOURCE "Source/D3D12/*.cpp" "Source/D3D12/*.h" "Source/D3D12/*.hpp" "External/memalloc/D3D12*.*") source_group ("" FILES ${D3D12_SOURCE}) if (NRI_ENABLE_EXTERNAL_LIBRARIES) @@ -214,10 +214,10 @@ if (NRI_ENABLE_VK_SUPPORT) message ("NRI adding backend: VK") set (COMPILE_DEFINITIONS ${COMPILE_DEFINITIONS} NRI_USE_VULKAN=1) - file (GLOB VK_SOURCE "Source/VK/*.cpp" "Source/VK/*.h" "Source/VK/*.hpp") + file (GLOB VK_SOURCE "Source/VK/*.cpp" "Source/VK/*.h" "Source/VK/*.hpp" "External/memalloc/vk_mem_alloc.h") source_group ("" FILES ${VK_SOURCE}) add_library (NRI_VK STATIC ${VK_SOURCE}) - target_include_directories (NRI_VK PRIVATE "Include" "Source/Shared" "External/vulkan/include") + target_include_directories (NRI_VK PRIVATE "Include" "Source/Shared" "External" "External/vulkan/include") target_compile_definitions (NRI_VK PRIVATE ${COMPILE_DEFINITIONS}) target_compile_options (NRI_VK PRIVATE ${COMPILE_OPTIONS}) target_link_libraries (NRI_VK PRIVATE NRI_Shared) @@ -283,6 +283,7 @@ file (GLOB NRI_RESOURCES "Resources/*") source_group ("Resources" FILES ${NRI_RESOURCES}) if (NRI_STATIC_LIBRARY) + set (COMPILE_DEFINITIONS ${COMPILE_DEFINITIONS} NRI_STATIC_LIBRARY) add_library (${PROJECT_NAME} STATIC ${NRI_SOURCE} ${NRI_HEADERS} ${NRI_RESOURCES} ${NRI_EXTENSIONS}) else () add_library (${PROJECT_NAME} SHARED ${NRI_SOURCE} ${NRI_HEADERS} ${NRI_RESOURCES} ${NRI_EXTENSIONS}) diff --git a/External/memalloc/D3D12MemAlloc.cpp b/External/memalloc/D3D12MemAlloc.cpp new file mode 100644 index 00000000..f7179e65 --- /dev/null +++ b/External/memalloc/D3D12MemAlloc.cpp @@ -0,0 +1,10048 @@ +// +// Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-parameter" +# pragma clang diagnostic ignored "-Wunused-variable" +#else +# pragma warning(push) +# pragma warning(disable : 4100) // unreferenced formal parameter +# pragma warning(disable : 4189) // local variable is initialized but not referenced +# pragma warning(disable : 4127) // conditional expression is constant +#endif + +#include "D3D12MemAlloc.h" + +#if(D3D12_SDK_VERSION >= 613) +# define D3D12MA_HAS_GPU_UPLOAD_HEAP +#endif + +#include +#include +#include +#include +#include +#include +#include // for _aligned_malloc, _aligned_free +#ifndef _WIN32 + #include +#endif + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// Configuration Begin +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +#ifndef _D3D12MA_CONFIGURATION + +#ifdef _WIN32 + #if !defined(WINVER) || WINVER < 0x0600 + #error Required at least WinAPI version supporting: client = Windows Vista, server = Windows Server 2008. + #endif +#endif + +#ifndef D3D12MA_SORT + #define D3D12MA_SORT(beg, end, cmp) std::sort(beg, end, cmp) +#endif + +#ifndef D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED + #include + #if D3D12MA_DXGI_1_4 + #include + #endif +#endif + +#ifndef D3D12MA_ASSERT + #include + #define D3D12MA_ASSERT(cond) assert(cond) +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef D3D12MA_HEAVY_ASSERT + #ifdef _DEBUG + #define D3D12MA_HEAVY_ASSERT(expr) //D3D12MA_ASSERT(expr) + #else + #define D3D12MA_HEAVY_ASSERT(expr) + #endif +#endif + +#ifndef D3D12MA_DEBUG_ALIGNMENT + /* + Minimum alignment of all allocations, in bytes. + Set to more than 1 for debugging purposes only. Must be power of two. + */ + #define D3D12MA_DEBUG_ALIGNMENT (1) +#endif + +#ifndef D3D12MA_DEBUG_MARGIN + // Minimum margin before and after every allocation, in bytes. + // Set nonzero for debugging purposes only. + #define D3D12MA_DEBUG_MARGIN (0) +#endif + +#ifndef D3D12MA_DEBUG_GLOBAL_MUTEX + /* + Set this to 1 for debugging purposes only, to enable single mutex protecting all + entry calls to the library. Can be useful for debugging multithreading issues. + */ + #define D3D12MA_DEBUG_GLOBAL_MUTEX (0) +#endif + +/* +Define this macro for debugging purposes only to force specific D3D12_RESOURCE_HEAP_TIER, +especially to test compatibility with D3D12_RESOURCE_HEAP_TIER_1 on modern GPUs. +*/ +//#define D3D12MA_FORCE_RESOURCE_HEAP_TIER D3D12_RESOURCE_HEAP_TIER_1 + +#ifndef D3D12MA_DEFAULT_BLOCK_SIZE + /// Default size of a block allocated as single ID3D12Heap. + #define D3D12MA_DEFAULT_BLOCK_SIZE (64ull * 1024 * 1024) +#endif + +#ifndef D3D12MA_DEBUG_LOG + #define D3D12MA_DEBUG_LOG(format, ...) + /* + #define D3D12MA_DEBUG_LOG(format, ...) do { \ + wprintf(format, __VA_ARGS__); \ + wprintf(L"\n"); \ + } while(false) + */ +#endif + +#endif // _D3D12MA_CONFIGURATION +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// Configuration End +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +#define D3D12MA_IID_PPV_ARGS(ppType) __uuidof(**(ppType)), reinterpret_cast(ppType) + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + #define D3D12MA_CREATE_NOT_ZEROED_AVAILABLE 1 +#endif + +namespace D3D12MA +{ +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP +static constexpr UINT HEAP_TYPE_COUNT = 5; +static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 4; // Only DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. +#else +static constexpr UINT HEAP_TYPE_COUNT = 4; +static constexpr UINT STANDARD_HEAP_TYPE_COUNT = 3; // Only DEFAULT, UPLOAD, READBACK +#endif +static constexpr UINT DEFAULT_POOL_MAX_COUNT = STANDARD_HEAP_TYPE_COUNT * 3; +static const UINT NEW_BLOCK_SIZE_SHIFT_MAX = 3; +// Minimum size of a free suballocation to register it in the free suballocation collection. +static const UINT64 MIN_FREE_SUBALLOCATION_SIZE_TO_REGISTER = 16; + +static const WCHAR* const HeapTypeNames[] = +{ + L"DEFAULT", + L"UPLOAD", + L"READBACK", + L"CUSTOM", + L"GPU_UPLOAD", +}; +static const WCHAR* const StandardHeapTypeNames[] = +{ + L"DEFAULT", + L"UPLOAD", + L"READBACK", + L"GPU_UPLOAD", +}; + +static const D3D12_HEAP_FLAGS RESOURCE_CLASS_HEAP_FLAGS = + D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + +static const D3D12_RESIDENCY_PRIORITY D3D12_RESIDENCY_PRIORITY_NONE = D3D12_RESIDENCY_PRIORITY(0); + +#ifndef _D3D12MA_ENUM_DECLARATIONS + +// Local copy of this enum, as it is provided only by , so it may not be available. +enum DXGI_MEMORY_SEGMENT_GROUP_COPY +{ + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY = 0, + DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY = 1, + DXGI_MEMORY_SEGMENT_GROUP_COUNT +}; + +enum class ResourceClass +{ + Unknown, Buffer, Non_RT_DS_Texture, RT_DS_Texture +}; + +enum SuballocationType +{ + SUBALLOCATION_TYPE_FREE = 0, + SUBALLOCATION_TYPE_ALLOCATION = 1, +}; + +#endif // _D3D12MA_ENUM_DECLARATIONS + + +#ifndef _D3D12MA_FUNCTIONS + +static void* DefaultAllocate(size_t Size, size_t Alignment, void* /*pPrivateData*/) +{ +#ifdef _WIN32 + return _aligned_malloc(Size, Alignment); +#else + return aligned_alloc(Alignment, Size); +#endif +} +static void DefaultFree(void* pMemory, void* /*pPrivateData*/) +{ +#ifdef _WIN32 + return _aligned_free(pMemory); +#else + return free(pMemory); +#endif +} + +static void* Malloc(const ALLOCATION_CALLBACKS& allocs, size_t size, size_t alignment) +{ + void* const result = (*allocs.pAllocate)(size, alignment, allocs.pPrivateData); + D3D12MA_ASSERT(result); + return result; +} +static void Free(const ALLOCATION_CALLBACKS& allocs, void* memory) +{ + (*allocs.pFree)(memory, allocs.pPrivateData); +} + +template +static T* Allocate(const ALLOCATION_CALLBACKS& allocs) +{ + return (T*)Malloc(allocs, sizeof(T), __alignof(T)); +} +template +static T* AllocateArray(const ALLOCATION_CALLBACKS& allocs, size_t count) +{ + return (T*)Malloc(allocs, sizeof(T) * count, __alignof(T)); +} + +#define D3D12MA_NEW(allocs, type) new(D3D12MA::Allocate(allocs))(type) +#define D3D12MA_NEW_ARRAY(allocs, type, count) new(D3D12MA::AllocateArray((allocs), (count)))(type) + +template +void D3D12MA_DELETE(const ALLOCATION_CALLBACKS& allocs, T* memory) +{ + if (memory) + { + memory->~T(); + Free(allocs, memory); + } +} +template +void D3D12MA_DELETE_ARRAY(const ALLOCATION_CALLBACKS& allocs, T* memory, size_t count) +{ + if (memory) + { + for (size_t i = count; i--; ) + { + memory[i].~T(); + } + Free(allocs, memory); + } +} + +static void SetupAllocationCallbacks(ALLOCATION_CALLBACKS& outAllocs, const ALLOCATION_CALLBACKS* allocationCallbacks) +{ + if (allocationCallbacks) + { + outAllocs = *allocationCallbacks; + D3D12MA_ASSERT(outAllocs.pAllocate != NULL && outAllocs.pFree != NULL); + } + else + { + outAllocs.pAllocate = &DefaultAllocate; + outAllocs.pFree = &DefaultFree; + outAllocs.pPrivateData = NULL; + } +} + +#define SAFE_RELEASE(ptr) do { if(ptr) { (ptr)->Release(); (ptr) = NULL; } } while(false) + +#define D3D12MA_VALIDATE(cond) do { if(!(cond)) { \ + D3D12MA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ +} } while(false) + +template +static T D3D12MA_MIN(const T& a, const T& b) { return a <= b ? a : b; } +template +static T D3D12MA_MAX(const T& a, const T& b) { return a <= b ? b : a; } + +template +static void D3D12MA_SWAP(T& a, T& b) { T tmp = a; a = b; b = tmp; } + +// Scans integer for index of first nonzero bit from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX +/* +static UINT8 BitScanLSB(UINT64 mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + UINT8 pos = 0; + UINT64 bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; +#endif +} +*/ +// Scans integer for index of first nonzero bit from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanLSB(UINT32 mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + UINT8 pos = 0; + UINT32 bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +// Scans integer for index of first nonzero bit from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX +static UINT8 BitScanMSB(UINT64 mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + UINT8 pos = 63; + UINT64 bit = 1ULL << 63; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} +// Scans integer for index of first nonzero bit from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX +/* +static UINT8 BitScanMSB(UINT32 mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + UINT8 pos = 31; + UINT32 bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} +*/ + +/* +Returns true if given number is a power of two. +T must be unsigned integer number or signed integer but always nonnegative. +For 0 returns true. +*/ +template +static bool IsPow2(T x) { return (x & (x - 1)) == 0; } + +// Aligns given value up to nearest multiply of align value. For example: AlignUp(11, 8) = 16. +// Use types like UINT, uint64_t as T. +template +static T AlignUp(T val, T alignment) +{ + D3D12MA_HEAVY_ASSERT(IsPow2(alignment)); + return (val + alignment - 1) & ~(alignment - 1); +} +// Aligns given value down to nearest multiply of align value. For example: AlignUp(11, 8) = 8. +// Use types like UINT, uint64_t as T. +template +static T AlignDown(T val, T alignment) +{ + D3D12MA_HEAVY_ASSERT(IsPow2(alignment)); + return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template +static T RoundDiv(T x, T y) { return (x + (y / (T)2)) / y; } +template +static T DivideRoundingUp(T x, T y) { return (x + y - 1) / y; } + +static WCHAR HexDigitToChar(UINT8 digit) +{ + if(digit < 10) + return L'0' + digit; + else + return L'A' + (digit - 10); +} + +/* +Performs binary search and returns iterator to first element that is greater or +equal to `key`, according to comparison `cmp`. + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or place where +new element with value (key) should be inserted. +*/ +template +static IterT BinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) +{ + size_t down = 0, up = (end - beg); + while (down < up) + { + const size_t mid = (down + up) / 2; + if (cmp(*(beg + mid), key)) + { + down = mid + 1; + } + else + { + up = mid; + } + } + return beg + down; +} + +/* +Performs binary search and returns iterator to an element that is equal to `key`, +according to comparison `cmp`. + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or end if not +found. +*/ +template +static IterT BinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ + IterT it = BinaryFindFirstNotLess(beg, end, value, cmp); + if (it == end || + (!cmp(*it, value) && !cmp(value, *it))) + { + return it; + } + return end; +} + +static UINT StandardHeapTypeToIndex(D3D12_HEAP_TYPE type) +{ + switch (type) + { + case D3D12_HEAP_TYPE_DEFAULT: return 0; + case D3D12_HEAP_TYPE_UPLOAD: return 1; + case D3D12_HEAP_TYPE_READBACK: return 2; +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + case D3D12_HEAP_TYPE_GPU_UPLOAD: return 3; +#endif + default: D3D12MA_ASSERT(0); return UINT_MAX; + } +} + +static D3D12_HEAP_TYPE IndexToStandardHeapType(UINT heapTypeIndex) +{ + switch(heapTypeIndex) + { + case 0: return D3D12_HEAP_TYPE_DEFAULT; + case 1: return D3D12_HEAP_TYPE_UPLOAD; + case 2: return D3D12_HEAP_TYPE_READBACK; +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + case 3: return D3D12_HEAP_TYPE_GPU_UPLOAD; +#endif + default: D3D12MA_ASSERT(0); return D3D12_HEAP_TYPE_CUSTOM; + } +} + +static UINT64 HeapFlagsToAlignment(D3D12_HEAP_FLAGS flags, bool denyMsaaTextures) +{ + /* + Documentation of D3D12_HEAP_DESC structure says: + + - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT defined as 64KB. + - D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT defined as 4MB. An + application must decide whether the heap will contain multi-sample + anti-aliasing (MSAA), in which case, the application must choose [this flag]. + + https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/ns-d3d12-d3d12_heap_desc + */ + + if (denyMsaaTextures) + return D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + + const D3D12_HEAP_FLAGS denyAllTexturesFlags = + D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; + const bool canContainAnyTextures = + (flags & denyAllTexturesFlags) != denyAllTexturesFlags; + return canContainAnyTextures ? + D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; +} + +static ResourceClass HeapFlagsToResourceClass(D3D12_HEAP_FLAGS heapFlags) +{ + const bool allowBuffers = (heapFlags & D3D12_HEAP_FLAG_DENY_BUFFERS) == 0; + const bool allowRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) == 0; + const bool allowNonRtDsTextures = (heapFlags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) == 0; + + const uint8_t allowedGroupCount = (allowBuffers ? 1 : 0) + (allowRtDsTextures ? 1 : 0) + (allowNonRtDsTextures ? 1 : 0); + if (allowedGroupCount != 1) + return ResourceClass::Unknown; + + if (allowRtDsTextures) + return ResourceClass::RT_DS_Texture; + if (allowNonRtDsTextures) + return ResourceClass::Non_RT_DS_Texture; + return ResourceClass::Buffer; +} + +static bool IsHeapTypeStandard(D3D12_HEAP_TYPE type) +{ + return type == D3D12_HEAP_TYPE_DEFAULT + || type == D3D12_HEAP_TYPE_UPLOAD +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + || type == D3D12_HEAP_TYPE_GPU_UPLOAD +#endif + || type == D3D12_HEAP_TYPE_READBACK; +} + +static D3D12_HEAP_PROPERTIES StandardHeapTypeToHeapProperties(D3D12_HEAP_TYPE type) +{ + D3D12MA_ASSERT(IsHeapTypeStandard(type)); + D3D12_HEAP_PROPERTIES result = {}; + result.Type = type; + return result; +} + +static bool IsFormatCompressed(DXGI_FORMAT format) +{ + switch (format) + { + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return true; + default: + return false; + } +} + +// Only some formats are supported. For others it returns 0. +static UINT GetBitsPerPixel(DXGI_FORMAT format) +{ + switch (format) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 128; + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 96; + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + return 64; + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + return 64; + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return 64; + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + return 32; + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + return 32; + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + return 32; + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + return 32; + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return 32; + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + return 16; + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + return 16; + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + return 8; + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + return 4; + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + return 8; + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + return 8; + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + return 4; + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + return 8; + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + return 8; + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return 8; + default: + return 0; + } +} + +template +static ResourceClass ResourceDescToResourceClass(const D3D12_RESOURCE_DESC_T& resDesc) +{ + if (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + return ResourceClass::Buffer; + // Else: it's surely a texture. + const bool isRenderTargetOrDepthStencil = + (resDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0; + return isRenderTargetOrDepthStencil ? ResourceClass::RT_DS_Texture : ResourceClass::Non_RT_DS_Texture; +} + +// This algorithm is overly conservative. +template +static bool CanUseSmallAlignment(const D3D12_RESOURCE_DESC_T& resourceDesc) +{ + if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) + return false; + if ((resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) != 0) + return false; + if (resourceDesc.SampleDesc.Count > 1) + return false; + if (resourceDesc.DepthOrArraySize != 1) + return false; + + UINT sizeX = (UINT)resourceDesc.Width; + UINT sizeY = resourceDesc.Height; + UINT bitsPerPixel = GetBitsPerPixel(resourceDesc.Format); + if (bitsPerPixel == 0) + return false; + + if (IsFormatCompressed(resourceDesc.Format)) + { + sizeX = DivideRoundingUp(sizeX, 4u); + sizeY = DivideRoundingUp(sizeY, 4u); + bitsPerPixel *= 16; + } + + UINT tileSizeX = 0, tileSizeY = 0; + switch (bitsPerPixel) + { + case 8: tileSizeX = 64; tileSizeY = 64; break; + case 16: tileSizeX = 64; tileSizeY = 32; break; + case 32: tileSizeX = 32; tileSizeY = 32; break; + case 64: tileSizeX = 32; tileSizeY = 16; break; + case 128: tileSizeX = 16; tileSizeY = 16; break; + default: return false; + } + + const UINT tileCount = DivideRoundingUp(sizeX, tileSizeX) * DivideRoundingUp(sizeY, tileSizeY); + return tileCount <= 16; +} + +static bool ValidateAllocateMemoryParameters( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation) +{ + return pAllocDesc && + pAllocInfo && + ppAllocation && + (pAllocInfo->Alignment == 0 || + pAllocInfo->Alignment == D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT || + pAllocInfo->Alignment == D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT) && + pAllocInfo->SizeInBytes != 0 && + pAllocInfo->SizeInBytes % (64ull * 1024) == 0; +} + +#endif // _D3D12MA_FUNCTIONS + +#ifndef _D3D12MA_STATISTICS_FUNCTIONS + +static void ClearStatistics(Statistics& outStats) +{ + outStats.BlockCount = 0; + outStats.AllocationCount = 0; + outStats.BlockBytes = 0; + outStats.AllocationBytes = 0; +} + +static void ClearDetailedStatistics(DetailedStatistics& outStats) +{ + ClearStatistics(outStats.Stats); + outStats.UnusedRangeCount = 0; + outStats.AllocationSizeMin = UINT64_MAX; + outStats.AllocationSizeMax = 0; + outStats.UnusedRangeSizeMin = UINT64_MAX; + outStats.UnusedRangeSizeMax = 0; +} + +static void AddStatistics(Statistics& inoutStats, const Statistics& src) +{ + inoutStats.BlockCount += src.BlockCount; + inoutStats.AllocationCount += src.AllocationCount; + inoutStats.BlockBytes += src.BlockBytes; + inoutStats.AllocationBytes += src.AllocationBytes; +} + +static void AddDetailedStatistics(DetailedStatistics& inoutStats, const DetailedStatistics& src) +{ + AddStatistics(inoutStats.Stats, src.Stats); + inoutStats.UnusedRangeCount += src.UnusedRangeCount; + inoutStats.AllocationSizeMin = D3D12MA_MIN(inoutStats.AllocationSizeMin, src.AllocationSizeMin); + inoutStats.AllocationSizeMax = D3D12MA_MAX(inoutStats.AllocationSizeMax, src.AllocationSizeMax); + inoutStats.UnusedRangeSizeMin = D3D12MA_MIN(inoutStats.UnusedRangeSizeMin, src.UnusedRangeSizeMin); + inoutStats.UnusedRangeSizeMax = D3D12MA_MAX(inoutStats.UnusedRangeSizeMax, src.UnusedRangeSizeMax); +} + +static void AddDetailedStatisticsAllocation(DetailedStatistics& inoutStats, UINT64 size) +{ + inoutStats.Stats.AllocationCount++; + inoutStats.Stats.AllocationBytes += size; + inoutStats.AllocationSizeMin = D3D12MA_MIN(inoutStats.AllocationSizeMin, size); + inoutStats.AllocationSizeMax = D3D12MA_MAX(inoutStats.AllocationSizeMax, size); +} + +static void AddDetailedStatisticsUnusedRange(DetailedStatistics& inoutStats, UINT64 size) +{ + inoutStats.UnusedRangeCount++; + inoutStats.UnusedRangeSizeMin = D3D12MA_MIN(inoutStats.UnusedRangeSizeMin, size); + inoutStats.UnusedRangeSizeMax = D3D12MA_MAX(inoutStats.UnusedRangeSizeMax, size); +} + +#endif // _D3D12MA_STATISTICS_FUNCTIONS + + +#ifndef _D3D12MA_MUTEX + +#ifndef D3D12MA_MUTEX + class Mutex + { + public: + void Lock() { m_Mutex.lock(); } + void Unlock() { m_Mutex.unlock(); } + + private: + std::mutex m_Mutex; + }; + #define D3D12MA_MUTEX Mutex +#endif + +#ifndef D3D12MA_RW_MUTEX +#ifdef _WIN32 + class RWMutex + { + public: + RWMutex() { InitializeSRWLock(&m_Lock); } + void LockRead() { AcquireSRWLockShared(&m_Lock); } + void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } + void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + + private: + SRWLOCK m_Lock; + }; +#else // #ifdef _WIN32 + class RWMutex + { + public: + RWMutex() {} + void LockRead() { m_Mutex.lock_shared(); } + void UnlockRead() { m_Mutex.unlock_shared(); } + void LockWrite() { m_Mutex.lock(); } + void UnlockWrite() { m_Mutex.unlock(); } + + private: + std::shared_timed_mutex m_Mutex; + }; +#endif // #ifdef _WIN32 + #define D3D12MA_RW_MUTEX RWMutex +#endif // #ifndef D3D12MA_RW_MUTEX + +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct MutexLock +{ + D3D12MA_CLASS_NO_COPY(MutexLock); +public: + MutexLock(D3D12MA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : NULL) + { + if (m_pMutex) m_pMutex->Lock(); + } + ~MutexLock() { if (m_pMutex) m_pMutex->Unlock(); } + +private: + D3D12MA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct MutexLockRead +{ + D3D12MA_CLASS_NO_COPY(MutexLockRead); +public: + MutexLockRead(D3D12MA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : NULL) + { + if(m_pMutex) + { + m_pMutex->LockRead(); + } + } + ~MutexLockRead() { if (m_pMutex) m_pMutex->UnlockRead(); } + +private: + D3D12MA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct MutexLockWrite +{ + D3D12MA_CLASS_NO_COPY(MutexLockWrite); +public: + MutexLockWrite(D3D12MA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : NULL) + { + if (m_pMutex) m_pMutex->LockWrite(); + } + ~MutexLockWrite() { if (m_pMutex) m_pMutex->UnlockWrite(); } + +private: + D3D12MA_RW_MUTEX* m_pMutex; +}; + +#if D3D12MA_DEBUG_GLOBAL_MUTEX + static D3D12MA_MUTEX g_DebugGlobalMutex; + #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK MutexLock debugGlobalMutexLock(g_DebugGlobalMutex, true); +#else + #define D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _D3D12MA_MUTEX + +#ifndef _D3D12MA_VECTOR +/* +Dynamically resizing continuous array. Class with interface similar to std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. +*/ +template +class Vector +{ +public: + using value_type = T; + using iterator = T*; + using const_iterator = const T*; + + // allocationCallbacks externally owned, must outlive this object. + Vector(const ALLOCATION_CALLBACKS& allocationCallbacks); + Vector(size_t count, const ALLOCATION_CALLBACKS& allocationCallbacks); + Vector(const Vector& src); + ~Vector(); + + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_pArray; } + const T* data() const { return m_pArray; } + void clear(bool freeMemory = false) { resize(0, freeMemory); } + + iterator begin() { return m_pArray; } + iterator end() { return m_pArray + m_Count; } + const_iterator cbegin() const { return m_pArray; } + const_iterator cend() const { return m_pArray + m_Count; } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + void push_front(const T& src) { insert(0, src); } + void push_back(const T& src); + void pop_front(); + void pop_back(); + + T& front(); + T& back(); + const T& front() const; + const T& back() const; + + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount, bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); + + template + size_t InsertSorted(const T& value, const CmpLess& cmp); + template + bool RemoveSorted(const T& value, const CmpLess& cmp); + + Vector& operator=(const Vector& rhs); + T& operator[](size_t index); + const T& operator[](size_t index) const; + +private: + const ALLOCATION_CALLBACKS& m_AllocationCallbacks; + T* m_pArray; + size_t m_Count; + size_t m_Capacity; +}; + +#ifndef _D3D12MA_VECTOR_FUNCTIONS +template +Vector::Vector(const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), + m_pArray(NULL), + m_Count(0), + m_Capacity(0) {} + +template +Vector::Vector(size_t count, const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), + m_pArray(count ? AllocateArray(allocationCallbacks, count) : NULL), + m_Count(count), + m_Capacity(count) {} + +template +Vector::Vector(const Vector& src) + : m_AllocationCallbacks(src.m_AllocationCallbacks), + m_pArray(src.m_Count ? AllocateArray(src.m_AllocationCallbacks, src.m_Count) : NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count > 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +Vector::~Vector() +{ + Free(m_AllocationCallbacks, m_pArray); +} + +template +void Vector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void Vector::pop_front() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + remove(0); +} + +template +void Vector::pop_back() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + resize(size() - 1); +} + +template +T& Vector::front() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[0]; +} + +template +T& Vector::back() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; +} + +template +const T& Vector::front() const +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[0]; +} + +template +const T& Vector::back() const +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + return m_pArray[m_Count - 1]; +} + +template +void Vector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = D3D12MA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + Free(m_AllocationCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void Vector::resize(size_t newCount, bool freeMemory) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = D3D12MA_MAX(newCount, D3D12MA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + else if (freeMemory) + { + newCapacity = newCount; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? AllocateArray(m_AllocationCallbacks, newCapacity) : NULL; + const size_t elementsToCopy = D3D12MA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + Free(m_AllocationCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void Vector::insert(size_t index, const T& src) +{ + D3D12MA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void Vector::remove(size_t index) +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} + +template template +size_t Vector::InsertSorted(const T& value, const CmpLess& cmp) +{ + const size_t indexToInsert = BinaryFindFirstNotLess( + m_pArray, + m_pArray + m_Count, + value, + cmp) - m_pArray; + insert(indexToInsert, value); + return indexToInsert; +} + +template template +bool Vector::RemoveSorted(const T& value, const CmpLess& cmp) +{ + const iterator it = BinaryFindFirstNotLess( + m_pArray, + m_pArray + m_Count, + value, + cmp); + if ((it != end()) && !cmp(*it, value) && !cmp(value, *it)) + { + size_t indexToRemove = it - begin(); + remove(indexToRemove); + return true; + } + return false; +} + +template +Vector& Vector::operator=(const Vector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +T& Vector::operator[](size_t index) +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + return m_pArray[index]; +} + +template +const T& Vector::operator[](size_t index) const +{ + D3D12MA_HEAVY_ASSERT(index < m_Count); + return m_pArray[index]; +} +#endif // _D3D12MA_VECTOR_FUNCTIONS +#endif // _D3D12MA_VECTOR + +#ifndef _D3D12MA_STRING_BUILDER +class StringBuilder +{ +public: + StringBuilder(const ALLOCATION_CALLBACKS& allocationCallbacks) : m_Data(allocationCallbacks) {} + + size_t GetLength() const { return m_Data.size(); } + LPCWSTR GetData() const { return m_Data.data(); } + + void Add(WCHAR ch) { m_Data.push_back(ch); } + void Add(LPCWSTR str); + void AddNewLine() { Add(L'\n'); } + void AddNumber(UINT num); + void AddNumber(UINT64 num); + void AddPointer(const void* ptr); + +private: + Vector m_Data; +}; + +#ifndef _D3D12MA_STRING_BUILDER_FUNCTIONS +void StringBuilder::Add(LPCWSTR str) +{ + const size_t len = wcslen(str); + if (len > 0) + { + const size_t oldCount = m_Data.size(); + m_Data.resize(oldCount + len); + memcpy(m_Data.data() + oldCount, str, len * sizeof(WCHAR)); + } +} + +void StringBuilder::AddNumber(UINT num) +{ + WCHAR buf[11]; + buf[10] = L'\0'; + WCHAR *p = &buf[10]; + do + { + *--p = L'0' + (num % 10); + num /= 10; + } + while (num); + Add(p); +} + +void StringBuilder::AddNumber(UINT64 num) +{ + WCHAR buf[21]; + buf[20] = L'\0'; + WCHAR *p = &buf[20]; + do + { + *--p = L'0' + (num % 10); + num /= 10; + } + while (num); + Add(p); +} + +void StringBuilder::AddPointer(const void* ptr) +{ + WCHAR buf[21]; + uintptr_t num = (uintptr_t)ptr; + buf[20] = L'\0'; + WCHAR *p = &buf[20]; + do + { + *--p = HexDigitToChar((UINT8)(num & 0xF)); + num >>= 4; + } + while (num); + Add(p); +} + +#endif // _D3D12MA_STRING_BUILDER_FUNCTIONS +#endif // _D3D12MA_STRING_BUILDER + +#ifndef _D3D12MA_JSON_WRITER +/* +Allows to conveniently build a correct JSON document to be written to the +StringBuilder passed to the constructor. +*/ +class JsonWriter +{ +public: + // stringBuilder - string builder to write the document to. Must remain alive for the whole lifetime of this object. + JsonWriter(const ALLOCATION_CALLBACKS& allocationCallbacks, StringBuilder& stringBuilder); + ~JsonWriter(); + + // Begins object by writing "{". + // Inside an object, you must call pairs of WriteString and a value, e.g.: + // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); + // Will write: { "A": 1, "B": 2 } + void BeginObject(bool singleLine = false); + // Ends object by writing "}". + void EndObject(); + + // Begins array by writing "[". + // Inside an array, you can write a sequence of any values. + void BeginArray(bool singleLine = false); + // Ends array by writing "[". + void EndArray(); + + // Writes a string value inside "". + // pStr can contain any UTF-16 characters, including '"', new line etc. - they will be properly escaped. + void WriteString(LPCWSTR pStr); + + // Begins writing a string value. + // Call BeginString, ContinueString, ContinueString, ..., EndString instead of + // WriteString to conveniently build the string content incrementally, made of + // parts including numbers. + void BeginString(LPCWSTR pStr = NULL); + // Posts next part of an open string. + void ContinueString(LPCWSTR pStr); + // Posts next part of an open string. The number is converted to decimal characters. + void ContinueString(UINT num); + void ContinueString(UINT64 num); + void ContinueString_Pointer(const void* ptr); + // Posts next part of an open string. Pointer value is converted to characters + // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 + // void ContinueString_Pointer(const void* ptr); + // Ends writing a string value by writing '"'. + void EndString(LPCWSTR pStr = NULL); + + // Writes a number value. + void WriteNumber(UINT num); + void WriteNumber(UINT64 num); + // Writes a boolean value - false or true. + void WriteBool(bool b); + // Writes a null value. + void WriteNull(); + + void AddAllocationToObject(const Allocation& alloc); + void AddDetailedStatisticsInfoObject(const DetailedStatistics& stats); + +private: + static const WCHAR* const INDENT; + + enum CollectionType + { + COLLECTION_TYPE_OBJECT, + COLLECTION_TYPE_ARRAY, + }; + struct StackItem + { + CollectionType type; + UINT valueCount; + bool singleLineMode; + }; + + StringBuilder& m_SB; + Vector m_Stack; + bool m_InsideString; + + void BeginValue(bool isString); + void WriteIndent(bool oneLess = false); +}; + +#ifndef _D3D12MA_JSON_WRITER_FUNCTIONS +const WCHAR* const JsonWriter::INDENT = L" "; + +JsonWriter::JsonWriter(const ALLOCATION_CALLBACKS& allocationCallbacks, StringBuilder& stringBuilder) + : m_SB(stringBuilder), + m_Stack(allocationCallbacks), + m_InsideString(false) {} + +JsonWriter::~JsonWriter() +{ + D3D12MA_ASSERT(!m_InsideString); + D3D12MA_ASSERT(m_Stack.empty()); +} + +void JsonWriter::BeginObject(bool singleLine) +{ + D3D12MA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add(L'{'); + + StackItem stackItem; + stackItem.type = COLLECTION_TYPE_OBJECT; + stackItem.valueCount = 0; + stackItem.singleLineMode = singleLine; + m_Stack.push_back(stackItem); +} + +void JsonWriter::EndObject() +{ + D3D12MA_ASSERT(!m_InsideString); + D3D12MA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); + D3D12MA_ASSERT(m_Stack.back().valueCount % 2 == 0); + + WriteIndent(true); + m_SB.Add(L'}'); + + m_Stack.pop_back(); +} + +void JsonWriter::BeginArray(bool singleLine) +{ + D3D12MA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add(L'['); + + StackItem stackItem; + stackItem.type = COLLECTION_TYPE_ARRAY; + stackItem.valueCount = 0; + stackItem.singleLineMode = singleLine; + m_Stack.push_back(stackItem); +} + +void JsonWriter::EndArray() +{ + D3D12MA_ASSERT(!m_InsideString); + D3D12MA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); + + WriteIndent(true); + m_SB.Add(L']'); + + m_Stack.pop_back(); +} + +void JsonWriter::WriteString(LPCWSTR pStr) +{ + BeginString(pStr); + EndString(); +} + +void JsonWriter::BeginString(LPCWSTR pStr) +{ + D3D12MA_ASSERT(!m_InsideString); + + BeginValue(true); + m_InsideString = true; + m_SB.Add(L'"'); + if (pStr != NULL) + { + ContinueString(pStr); + } +} + +void JsonWriter::ContinueString(LPCWSTR pStr) +{ + D3D12MA_ASSERT(m_InsideString); + D3D12MA_ASSERT(pStr); + + for (const WCHAR *p = pStr; *p; ++p) + { + // the strings we encode are assumed to be in UTF-16LE format, the native + // windows wide character Unicode format. In this encoding Unicode code + // points U+0000 to U+D7FF and U+E000 to U+FFFF are encoded in two bytes, + // and everything else takes more than two bytes. We will reject any + // multi wchar character encodings for simplicity. + UINT val = (UINT)*p; + D3D12MA_ASSERT(((val <= 0xD7FF) || (0xE000 <= val && val <= 0xFFFF)) && + "Character not currently supported."); + switch (*p) + { + case L'"': m_SB.Add(L'\\'); m_SB.Add(L'"'); break; + case L'\\': m_SB.Add(L'\\'); m_SB.Add(L'\\'); break; + case L'/': m_SB.Add(L'\\'); m_SB.Add(L'/'); break; + case L'\b': m_SB.Add(L'\\'); m_SB.Add(L'b'); break; + case L'\f': m_SB.Add(L'\\'); m_SB.Add(L'f'); break; + case L'\n': m_SB.Add(L'\\'); m_SB.Add(L'n'); break; + case L'\r': m_SB.Add(L'\\'); m_SB.Add(L'r'); break; + case L'\t': m_SB.Add(L'\\'); m_SB.Add(L't'); break; + default: + // conservatively use encoding \uXXXX for any Unicode character + // requiring more than one byte. + if (32 <= val && val < 256) + m_SB.Add(*p); + else + { + m_SB.Add(L'\\'); + m_SB.Add(L'u'); + for (UINT i = 0; i < 4; ++i) + { + UINT hexDigit = (val & 0xF000) >> 12; + val <<= 4; + if (hexDigit < 10) + m_SB.Add(L'0' + (WCHAR)hexDigit); + else + m_SB.Add(L'A' + (WCHAR)hexDigit); + } + } + break; + } + } +} + +void JsonWriter::ContinueString(UINT num) +{ + D3D12MA_ASSERT(m_InsideString); + m_SB.AddNumber(num); +} + +void JsonWriter::ContinueString(UINT64 num) +{ + D3D12MA_ASSERT(m_InsideString); + m_SB.AddNumber(num); +} + +void JsonWriter::ContinueString_Pointer(const void* ptr) +{ + D3D12MA_ASSERT(m_InsideString); + m_SB.AddPointer(ptr); +} + +void JsonWriter::EndString(LPCWSTR pStr) +{ + D3D12MA_ASSERT(m_InsideString); + + if (pStr) + ContinueString(pStr); + m_SB.Add(L'"'); + m_InsideString = false; +} + +void JsonWriter::WriteNumber(UINT num) +{ + D3D12MA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(num); +} + +void JsonWriter::WriteNumber(UINT64 num) +{ + D3D12MA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(num); +} + +void JsonWriter::WriteBool(bool b) +{ + D3D12MA_ASSERT(!m_InsideString); + BeginValue(false); + if (b) + m_SB.Add(L"true"); + else + m_SB.Add(L"false"); +} + +void JsonWriter::WriteNull() +{ + D3D12MA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add(L"null"); +} + +void JsonWriter::AddAllocationToObject(const Allocation& alloc) +{ + WriteString(L"Type"); + switch (alloc.m_PackedData.GetResourceDimension()) { + case D3D12_RESOURCE_DIMENSION_UNKNOWN: + WriteString(L"UNKNOWN"); + break; + case D3D12_RESOURCE_DIMENSION_BUFFER: + WriteString(L"BUFFER"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE1D: + WriteString(L"TEXTURE1D"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE2D: + WriteString(L"TEXTURE2D"); + break; + case D3D12_RESOURCE_DIMENSION_TEXTURE3D: + WriteString(L"TEXTURE3D"); + break; + default: D3D12MA_ASSERT(0); break; + } + + WriteString(L"Size"); + WriteNumber(alloc.GetSize()); + WriteString(L"Usage"); + WriteNumber((UINT)alloc.m_PackedData.GetResourceFlags()); + + void* privateData = alloc.GetPrivateData(); + if (privateData) + { + WriteString(L"CustomData"); + BeginString(); + ContinueString_Pointer(privateData); + EndString(); + } + + LPCWSTR name = alloc.GetName(); + if (name != NULL) + { + WriteString(L"Name"); + WriteString(name); + } + if (alloc.m_PackedData.GetTextureLayout()) + { + WriteString(L"Layout"); + WriteNumber((UINT)alloc.m_PackedData.GetTextureLayout()); + } +} + +void JsonWriter::AddDetailedStatisticsInfoObject(const DetailedStatistics& stats) +{ + BeginObject(); + + WriteString(L"BlockCount"); + WriteNumber(stats.Stats.BlockCount); + WriteString(L"BlockBytes"); + WriteNumber(stats.Stats.BlockBytes); + WriteString(L"AllocationCount"); + WriteNumber(stats.Stats.AllocationCount); + WriteString(L"AllocationBytes"); + WriteNumber(stats.Stats.AllocationBytes); + WriteString(L"UnusedRangeCount"); + WriteNumber(stats.UnusedRangeCount); + + if (stats.Stats.AllocationCount > 1) + { + WriteString(L"AllocationSizeMin"); + WriteNumber(stats.AllocationSizeMin); + WriteString(L"AllocationSizeMax"); + WriteNumber(stats.AllocationSizeMax); + } + if (stats.UnusedRangeCount > 1) + { + WriteString(L"UnusedRangeSizeMin"); + WriteNumber(stats.UnusedRangeSizeMin); + WriteString(L"UnusedRangeSizeMax"); + WriteNumber(stats.UnusedRangeSizeMax); + } + EndObject(); +} + +void JsonWriter::BeginValue(bool isString) +{ + if (!m_Stack.empty()) + { + StackItem& currItem = m_Stack.back(); + if (currItem.type == COLLECTION_TYPE_OBJECT && currItem.valueCount % 2 == 0) + { + D3D12MA_ASSERT(isString); + } + + if (currItem.type == COLLECTION_TYPE_OBJECT && currItem.valueCount % 2 == 1) + { + m_SB.Add(L':'); m_SB.Add(L' '); + } + else if (currItem.valueCount > 0) + { + m_SB.Add(L','); m_SB.Add(L' '); + WriteIndent(); + } + else + { + WriteIndent(); + } + ++currItem.valueCount; + } +} + +void JsonWriter::WriteIndent(bool oneLess) +{ + if (!m_Stack.empty() && !m_Stack.back().singleLineMode) + { + m_SB.AddNewLine(); + + size_t count = m_Stack.size(); + if (count > 0 && oneLess) + { + --count; + } + for (size_t i = 0; i < count; ++i) + { + m_SB.Add(INDENT); + } + } +} +#endif // _D3D12MA_JSON_WRITER_FUNCTIONS +#endif // _D3D12MA_JSON_WRITER + +#ifndef _D3D12MA_POOL_ALLOCATOR +/* +Allocator for objects of type T using a list of arrays (pools) to speed up +allocation. Number of elements that can be allocated is not bounded because +allocator can create multiple blocks. +T should be POD because constructor and destructor is not called in Alloc or +Free. +*/ +template +class PoolAllocator +{ + D3D12MA_CLASS_NO_COPY(PoolAllocator) +public: + // allocationCallbacks externally owned, must outlive this object. + PoolAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT firstBlockCapacity); + ~PoolAllocator() { Clear(); } + + void Clear(); + template + T* Alloc(Types... args); + void Free(T* ptr); + +private: + union Item + { + UINT NextFreeIndex; // UINT32_MAX means end of list. + alignas(T) char Value[sizeof(T)]; + }; + + struct ItemBlock + { + Item* pItems; + UINT Capacity; + UINT FirstFreeIndex; + }; + + const ALLOCATION_CALLBACKS& m_AllocationCallbacks; + const UINT m_FirstBlockCapacity; + Vector m_ItemBlocks; + + ItemBlock& CreateNewBlock(); +}; + +#ifndef _D3D12MA_POOL_ALLOCATOR_FUNCTIONS +template +PoolAllocator::PoolAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, UINT firstBlockCapacity) + : m_AllocationCallbacks(allocationCallbacks), + m_FirstBlockCapacity(firstBlockCapacity), + m_ItemBlocks(allocationCallbacks) +{ + D3D12MA_ASSERT(m_FirstBlockCapacity > 1); +} + +template +void PoolAllocator::Clear() +{ + for(size_t i = m_ItemBlocks.size(); i--; ) + { + D3D12MA_DELETE_ARRAY(m_AllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); + } + m_ItemBlocks.clear(true); +} + +template template +T* PoolAllocator::Alloc(Types... args) +{ + for(size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + // This block has some free items: Use first one. + if(block.FirstFreeIndex != UINT32_MAX) + { + Item* const pItem = &block.pItems[block.FirstFreeIndex]; + block.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; + } + } + + // No block has free item: Create new one and use it. + ItemBlock& newBlock = CreateNewBlock(); + Item* const pItem = &newBlock.pItems[0]; + newBlock.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; +} + +template +void PoolAllocator::Free(T* ptr) +{ + // Search all memory blocks to find ptr. + for(size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + + Item* pItemPtr; + memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); + + // Check if pItemPtr is in address range of this block. + if((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) + { + ptr->~T(); // Explicit destructor call. + const UINT index = static_cast(pItemPtr - block.pItems); + pItemPtr->NextFreeIndex = block.FirstFreeIndex; + block.FirstFreeIndex = index; + return; + } + } + D3D12MA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); +} + +template +typename PoolAllocator::ItemBlock& PoolAllocator::CreateNewBlock() +{ + const UINT newBlockCapacity = m_ItemBlocks.empty() ? + m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; + + const ItemBlock newBlock = { + D3D12MA_NEW_ARRAY(m_AllocationCallbacks, Item, newBlockCapacity), + newBlockCapacity, + 0 }; + + m_ItemBlocks.push_back(newBlock); + + // Setup singly-linked list of all free items in this block. + for(UINT i = 0; i < newBlockCapacity - 1; ++i) + { + newBlock.pItems[i].NextFreeIndex = i + 1; + } + newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; + return m_ItemBlocks.back(); +} +#endif // _D3D12MA_POOL_ALLOCATOR_FUNCTIONS +#endif // _D3D12MA_POOL_ALLOCATOR + +#ifndef _D3D12MA_LIST +/* +Doubly linked list, with elements allocated out of PoolAllocator. +Has custom interface, as well as STL-style interface, including iterator and +const_iterator. +*/ +template +class List +{ + D3D12MA_CLASS_NO_COPY(List) +public: + struct Item + { + Item* pPrev; + Item* pNext; + T Value; + }; + + class reverse_iterator; + class const_reverse_iterator; + class iterator + { + friend class List; + friend class const_iterator; + + public: + iterator() = default; + iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const; + T* operator->() const; + + iterator& operator++(); + iterator& operator--(); + iterator operator++(int); + iterator operator--(int); + + bool operator==(const iterator& rhs) const; + bool operator!=(const iterator& rhs) const; + + private: + List* m_pList = NULL; + Item* m_pItem = NULL; + + iterator(List* pList, Item* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + + class reverse_iterator + { + friend class List; + friend class const_reverse_iterator; + + public: + reverse_iterator() = default; + reverse_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const; + T* operator->() const; + + reverse_iterator& operator++(); + reverse_iterator& operator--(); + reverse_iterator operator++(int); + reverse_iterator operator--(int); + + bool operator==(const reverse_iterator& rhs) const; + bool operator!=(const reverse_iterator& rhs) const; + + private: + List* m_pList = NULL; + Item* m_pItem = NULL; + + reverse_iterator(List* pList, Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + + class const_iterator + { + friend class List; + + public: + const_iterator() = default; + const_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const const_reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator dropConst() const; + const T& operator*() const; + const T* operator->() const; + + const_iterator& operator++(); + const_iterator& operator--(); + const_iterator operator++(int); + const_iterator operator--(int); + + bool operator==(const const_iterator& rhs) const; + bool operator!=(const const_iterator& rhs) const; + + private: + const List* m_pList = NULL; + const Item* m_pItem = NULL; + + const_iterator(const List* pList, const Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + + class const_reverse_iterator + { + friend class List; + + public: + const_reverse_iterator() = default; + const_reverse_iterator(const iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const reverse_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const const_iterator& src) + : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator dropConst() const; + const T& operator*() const; + const T* operator->() const; + + const_reverse_iterator& operator++(); + const_reverse_iterator& operator--(); + const_reverse_iterator operator++(int); + const_reverse_iterator operator--(int); + + bool operator==(const const_reverse_iterator& rhs) const; + bool operator!=(const const_reverse_iterator& rhs) const; + + private: + const List* m_pList = NULL; + const Item* m_pItem = NULL; + + const_reverse_iterator(const List* pList, const Item* pItem) + : m_pList(pList), m_pItem(pItem) {} + }; + + // allocationCallbacks externally owned, must outlive this object. + List(const ALLOCATION_CALLBACKS& allocationCallbacks); + // Intentionally not calling Clear, because that would be unnecessary + // computations to return all items to m_ItemAllocator as free. + ~List() = default; + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + + Item* Front() { return m_pFront; } + const Item* Front() const { return m_pFront; } + Item* Back() { return m_pBack; } + const Item* Back() const { return m_pBack; } + + bool empty() const { return IsEmpty(); } + size_t size() const { return GetCount(); } + void push_back(const T& value) { PushBack(value); } + iterator insert(iterator it, const T& value) { return iterator(this, InsertBefore(it.m_pItem, value)); } + void clear() { Clear(); } + void erase(iterator it) { Remove(it.m_pItem); } + + iterator begin() { return iterator(this, Front()); } + iterator end() { return iterator(this, NULL); } + reverse_iterator rbegin() { return reverse_iterator(this, Back()); } + reverse_iterator rend() { return reverse_iterator(this, NULL); } + + const_iterator cbegin() const { return const_iterator(this, Front()); } + const_iterator cend() const { return const_iterator(this, NULL); } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(this, Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(this, NULL); } + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + + Item* PushBack(); + Item* PushFront(); + Item* PushBack(const T& value); + Item* PushFront(const T& value); + void PopBack(); + void PopFront(); + + // Item can be null - it means PushBack. + Item* InsertBefore(Item* pItem); + // Item can be null - it means PushFront. + Item* InsertAfter(Item* pItem); + Item* InsertBefore(Item* pItem, const T& value); + Item* InsertAfter(Item* pItem, const T& value); + + void Clear(); + void Remove(Item* pItem); + +private: + const ALLOCATION_CALLBACKS& m_AllocationCallbacks; + PoolAllocator m_ItemAllocator; + Item* m_pFront; + Item* m_pBack; + size_t m_Count; +}; + +#ifndef _D3D12MA_LIST_ITERATOR_FUNCTIONS +template +T& List::iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +T* List::iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::iterator& List::iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pNext; + return *this; +} + +template +typename List::iterator& List::iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename List::iterator List::iterator::operator++(int) +{ + iterator result = *this; + ++* this; + return result; +} + +template +typename List::iterator List::iterator::operator--(int) +{ + iterator result = *this; + --* this; + return result; +} + +template +bool List::iterator::operator==(const iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::iterator::operator!=(const iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_REVERSE_ITERATOR_FUNCTIONS +template +T& List::reverse_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +T* List::reverse_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::reverse_iterator& List::reverse_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pPrev; + return *this; +} + +template +typename List::reverse_iterator& List::reverse_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename List::reverse_iterator List::reverse_iterator::operator++(int) +{ + reverse_iterator result = *this; + ++* this; + return result; +} + +template +typename List::reverse_iterator List::reverse_iterator::operator--(int) +{ + reverse_iterator result = *this; + --* this; + return result; +} + +template +bool List::reverse_iterator::operator==(const reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::reverse_iterator::operator!=(const reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_REVERSE_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_CONST_ITERATOR_FUNCTIONS +template +typename List::iterator List::const_iterator::dropConst() const +{ + return iterator(const_cast*>(m_pList), const_cast(m_pItem)); +} + +template +const T& List::const_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +const T* List::const_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::const_iterator& List::const_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pNext; + return *this; +} + +template +typename List::const_iterator& List::const_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename List::const_iterator List::const_iterator::operator++(int) +{ + const_iterator result = *this; + ++* this; + return result; +} + +template +typename List::const_iterator List::const_iterator::operator--(int) +{ + const_iterator result = *this; + --* this; + return result; +} + +template +bool List::const_iterator::operator==(const const_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::const_iterator::operator!=(const const_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_CONST_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_CONST_REVERSE_ITERATOR_FUNCTIONS +template +typename List::reverse_iterator List::const_reverse_iterator::dropConst() const +{ + return reverse_iterator(const_cast*>(m_pList), const_cast(m_pItem)); +} + +template +const T& List::const_reverse_iterator::operator*() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return m_pItem->Value; +} + +template +const T* List::const_reverse_iterator::operator->() const +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + return &m_pItem->Value; +} + +template +typename List::const_reverse_iterator& List::const_reverse_iterator::operator++() +{ + D3D12MA_HEAVY_ASSERT(m_pItem != NULL); + m_pItem = m_pItem->pPrev; + return *this; +} + +template +typename List::const_reverse_iterator& List::const_reverse_iterator::operator--() +{ + if (m_pItem != NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + D3D12MA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename List::const_reverse_iterator List::const_reverse_iterator::operator++(int) +{ + const_reverse_iterator result = *this; + ++* this; + return result; +} + +template +typename List::const_reverse_iterator List::const_reverse_iterator::operator--(int) +{ + const_reverse_iterator result = *this; + --* this; + return result; +} + +template +bool List::const_reverse_iterator::operator==(const const_reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem == rhs.m_pItem; +} + +template +bool List::const_reverse_iterator::operator!=(const const_reverse_iterator& rhs) const +{ + D3D12MA_HEAVY_ASSERT(m_pList == rhs.m_pList); + return m_pItem != rhs.m_pItem; +} +#endif // _D3D12MA_LIST_CONST_REVERSE_ITERATOR_FUNCTIONS + +#ifndef _D3D12MA_LIST_FUNCTIONS +template +List::List(const ALLOCATION_CALLBACKS& allocationCallbacks) + : m_AllocationCallbacks(allocationCallbacks), + m_ItemAllocator(allocationCallbacks, 128), + m_pFront(NULL), + m_pBack(NULL), + m_Count(0) {} + +template +void List::Clear() +{ + if(!IsEmpty()) + { + Item* pItem = m_pBack; + while(pItem != NULL) + { + Item* const pPrevItem = pItem->pPrev; + m_ItemAllocator.Free(pItem); + pItem = pPrevItem; + } + m_pFront = NULL; + m_pBack = NULL; + m_Count = 0; + } +} + +template +typename List::Item* List::PushBack() +{ + Item* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pNext = NULL; + if(IsEmpty()) + { + pNewItem->pPrev = NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pPrev = m_pBack; + m_pBack->pNext = pNewItem; + m_pBack = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +typename List::Item* List::PushFront() +{ + Item* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pPrev = NULL; + if(IsEmpty()) + { + pNewItem->pNext = NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pNext = m_pFront; + m_pFront->pPrev = pNewItem; + m_pFront = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +typename List::Item* List::PushBack(const T& value) +{ + Item* const pNewItem = PushBack(); + pNewItem->Value = value; + return pNewItem; +} + +template +typename List::Item* List::PushFront(const T& value) +{ + Item* const pNewItem = PushFront(); + pNewItem->Value = value; + return pNewItem; +} + +template +void List::PopBack() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + Item* const pBackItem = m_pBack; + Item* const pPrevItem = pBackItem->pPrev; + if(pPrevItem != NULL) + { + pPrevItem->pNext = NULL; + } + m_pBack = pPrevItem; + m_ItemAllocator.Free(pBackItem); + --m_Count; +} + +template +void List::PopFront() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + Item* const pFrontItem = m_pFront; + Item* const pNextItem = pFrontItem->pNext; + if(pNextItem != NULL) + { + pNextItem->pPrev = NULL; + } + m_pFront = pNextItem; + m_ItemAllocator.Free(pFrontItem); + --m_Count; +} + +template +void List::Remove(Item* pItem) +{ + D3D12MA_HEAVY_ASSERT(pItem != NULL); + D3D12MA_HEAVY_ASSERT(m_Count > 0); + + if(pItem->pPrev != NULL) + { + pItem->pPrev->pNext = pItem->pNext; + } + else + { + D3D12MA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = pItem->pNext; + } + + if(pItem->pNext != NULL) + { + pItem->pNext->pPrev = pItem->pPrev; + } + else + { + D3D12MA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = pItem->pPrev; + } + + m_ItemAllocator.Free(pItem); + --m_Count; +} + +template +typename List::Item* List::InsertBefore(Item* pItem) +{ + if(pItem != NULL) + { + Item* const prevItem = pItem->pPrev; + Item* const newItem = m_ItemAllocator.Alloc(); + newItem->pPrev = prevItem; + newItem->pNext = pItem; + pItem->pPrev = newItem; + if(prevItem != NULL) + { + prevItem->pNext = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = newItem; + } + ++m_Count; + return newItem; + } + else + { + return PushBack(); + } +} + +template +typename List::Item* List::InsertAfter(Item* pItem) +{ + if(pItem != NULL) + { + Item* const nextItem = pItem->pNext; + Item* const newItem = m_ItemAllocator.Alloc(); + newItem->pNext = nextItem; + newItem->pPrev = pItem; + pItem->pNext = newItem; + if(nextItem != NULL) + { + nextItem->pPrev = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = newItem; + } + ++m_Count; + return newItem; + } + else + return PushFront(); +} + +template +typename List::Item* List::InsertBefore(Item* pItem, const T& value) +{ + Item* const newItem = InsertBefore(pItem); + newItem->Value = value; + return newItem; +} + +template +typename List::Item* List::InsertAfter(Item* pItem, const T& value) +{ + Item* const newItem = InsertAfter(pItem); + newItem->Value = value; + return newItem; +} +#endif // _D3D12MA_LIST_FUNCTIONS +#endif // _D3D12MA_LIST + +#ifndef _D3D12MA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ + using ItemType = MyItem; + static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } + static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } + static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } + static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template +class IntrusiveLinkedList +{ +public: + using ItemType = typename ItemTypeTraits::ItemType; + static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } + static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + + // Movable, not copyable. + IntrusiveLinkedList() = default; + IntrusiveLinkedList(const IntrusiveLinkedList&) = delete; + IntrusiveLinkedList(IntrusiveLinkedList&& src); + IntrusiveLinkedList& operator=(const IntrusiveLinkedList&) = delete; + IntrusiveLinkedList& operator=(IntrusiveLinkedList&& src); + ~IntrusiveLinkedList() { D3D12MA_HEAVY_ASSERT(IsEmpty()); } + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + + ItemType* Front() { return m_Front; } + ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } + const ItemType* Back() const { return m_Back; } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); + + // MyItem can be null - it means PushBack. + void InsertBefore(ItemType* existingItem, ItemType* newItem); + // MyItem can be null - it means PushFront. + void InsertAfter(ItemType* existingItem, ItemType* newItem); + + void Remove(ItemType* item); + void RemoveAll(); + +private: + ItemType* m_Front = NULL; + ItemType* m_Back = NULL; + size_t m_Count = 0; +}; + +#ifndef _D3D12MA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +IntrusiveLinkedList::IntrusiveLinkedList(IntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = NULL; + src.m_Count = 0; +} + +template +IntrusiveLinkedList& IntrusiveLinkedList::operator=(IntrusiveLinkedList&& src) +{ + if (&src != this) + { + D3D12MA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = NULL; + src.m_Count = 0; + } + return *this; +} + +template +void IntrusiveLinkedList::PushBack(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; + } +} + +template +void IntrusiveLinkedList::PushFront(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == NULL && ItemTypeTraits::GetNext(item) == NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } +} + +template +typename IntrusiveLinkedList::ItemType* IntrusiveLinkedList::PopBack() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != NULL) + { + ItemTypeTraits::AccessNext(prevItem) = NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = NULL; + ItemTypeTraits::AccessNext(backItem) = NULL; + return backItem; +} + +template +typename IntrusiveLinkedList::ItemType* IntrusiveLinkedList::PopFront() +{ + D3D12MA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = NULL; + ItemTypeTraits::AccessNext(frontItem) = NULL; + return frontItem; +} + +template +void IntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); + if (existingItem != NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} + +template +void IntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + D3D12MA_HEAVY_ASSERT(newItem != NULL && ItemTypeTraits::GetPrev(newItem) == NULL && ItemTypeTraits::GetNext(newItem) == NULL); + if (existingItem != NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + D3D12MA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} + +template +void IntrusiveLinkedList::Remove(ItemType* item) +{ + D3D12MA_HEAVY_ASSERT(item != NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + D3D12MA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + + if (ItemTypeTraits::GetNext(item) != NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + D3D12MA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = NULL; + ItemTypeTraits::AccessNext(item) = NULL; + --m_Count; +} + +template +void IntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = NULL; + ItemTypeTraits::AccessNext(item) = NULL; + item = prevItem; + } + m_Front = NULL; + m_Back = NULL; + m_Count = 0; + } +} +#endif // _D3D12MA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _D3D12MA_INTRUSIVE_LINKED_LIST + +#ifndef _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over PoolAllocator free list, for allocation of Allocation objects. +*/ +class AllocationObjectAllocator +{ + D3D12MA_CLASS_NO_COPY(AllocationObjectAllocator); +public: + AllocationObjectAllocator(const ALLOCATION_CALLBACKS& allocationCallbacks, bool useMutex) + : m_Allocator(allocationCallbacks, 1024), m_UseMutex(useMutex) {} + + template + Allocation* Allocate(Types... args); + void Free(Allocation* alloc); + +private: + D3D12MA_MUTEX m_Mutex; + bool m_UseMutex; + PoolAllocator m_Allocator; +}; + +#ifndef _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR_FUNCTIONS +template +Allocation* AllocationObjectAllocator::Allocate(Types... args) +{ + MutexLock mutexLock(m_Mutex, m_UseMutex); + return m_Allocator.Alloc(std::forward(args)...); +} + +void AllocationObjectAllocator::Free(Allocation* alloc) +{ + MutexLock mutexLock(m_Mutex, m_UseMutex); + m_Allocator.Free(alloc); +} +#endif // _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR_FUNCTIONS +#endif // _D3D12MA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _D3D12MA_SUBALLOCATION +/* +Represents a region of NormalBlock that is either assigned and returned as +allocated memory block or free. +*/ +struct Suballocation +{ + UINT64 offset; + UINT64 size; + void* privateData; + SuballocationType type; +}; +using SuballocationList = List; + +// Comparator for offsets. +struct SuballocationOffsetLess +{ + bool operator()(const Suballocation& lhs, const Suballocation& rhs) const + { + return lhs.offset < rhs.offset; + } +}; + +struct SuballocationOffsetGreater +{ + bool operator()(const Suballocation& lhs, const Suballocation& rhs) const + { + return lhs.offset > rhs.offset; + } +}; + +struct SuballocationItemSizeLess +{ + bool operator()(const SuballocationList::iterator lhs, const SuballocationList::iterator rhs) const + { + return lhs->size < rhs->size; + } + bool operator()(const SuballocationList::iterator lhs, UINT64 rhsSize) const + { + return lhs->size < rhsSize; + } +}; +#endif // _D3D12MA_SUBALLOCATION + +#ifndef _D3D12MA_ALLOCATION_REQUEST +/* +Parameters of planned allocation inside a NormalBlock. +*/ +struct AllocationRequest +{ + AllocHandle allocHandle; + UINT64 size; + UINT64 algorithmData; + UINT64 sumFreeSize; // Sum size of free items that overlap with proposed allocation. + UINT64 sumItemSize; // Sum size of items to make lost that overlap with proposed allocation. + SuballocationList::iterator item; +}; +#endif // _D3D12MA_ALLOCATION_REQUEST + +#ifndef _D3D12MA_BLOCK_METADATA +/* +Data structure used for bookkeeping of allocations and unused ranges of memory +in a single ID3D12Heap memory block. +*/ +class BlockMetadata +{ +public: + BlockMetadata(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); + virtual ~BlockMetadata() = default; + + virtual void Init(UINT64 size) { m_Size = size; } + // Validates all data structures inside this object. If not valid, returns false. + virtual bool Validate() const = 0; + UINT64 GetSize() const { return m_Size; } + bool IsVirtual() const { return m_IsVirtual; } + virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; + virtual UINT64 GetSumFreeSize() const = 0; + virtual UINT64 GetAllocationOffset(AllocHandle allocHandle) const = 0; + // Returns true if this block is empty - contains only single free suballocation. + virtual bool IsEmpty() const = 0; + + virtual void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const = 0; + + // Tries to find a place for suballocation with given parameters inside this block. + // If succeeded, fills pAllocationRequest and returns true. + // If failed, returns false. + virtual bool CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) = 0; + + // Makes actual allocation based on request. Request must already be checked and valid. + virtual void Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* PrivateData) = 0; + + virtual void Free(AllocHandle allocHandle) = 0; + // Frees all allocations. + // Careful! Don't call it if there are Allocation objects owned by pPrivateData of of cleared allocations! + virtual void Clear() = 0; + + virtual AllocHandle GetAllocationListBegin() const = 0; + virtual AllocHandle GetNextAllocation(AllocHandle prevAlloc) const = 0; + virtual UINT64 GetNextFreeRegionSize(AllocHandle alloc) const = 0; + virtual void* GetAllocationPrivateData(AllocHandle allocHandle) const = 0; + virtual void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) = 0; + + virtual void AddStatistics(Statistics& inoutStats) const = 0; + virtual void AddDetailedStatistics(DetailedStatistics& inoutStats) const = 0; + virtual void WriteAllocationInfoToJson(JsonWriter& json) const = 0; + virtual void DebugLogAllAllocations() const = 0; + +protected: + const ALLOCATION_CALLBACKS* GetAllocs() const { return m_pAllocationCallbacks; } + UINT64 GetDebugMargin() const { return IsVirtual() ? 0 : D3D12MA_DEBUG_MARGIN; } + + void DebugLogAllocation(UINT64 offset, UINT64 size, void* privateData) const; + void PrintDetailedMap_Begin(JsonWriter& json, + UINT64 unusedBytes, + size_t allocationCount, + size_t unusedRangeCount) const; + void PrintDetailedMap_Allocation(JsonWriter& json, + UINT64 offset, UINT64 size, void* privateData) const; + void PrintDetailedMap_UnusedRange(JsonWriter& json, + UINT64 offset, UINT64 size) const; + void PrintDetailedMap_End(JsonWriter& json) const; + +private: + UINT64 m_Size; + bool m_IsVirtual; + const ALLOCATION_CALLBACKS* m_pAllocationCallbacks; + + D3D12MA_CLASS_NO_COPY(BlockMetadata); +}; + +#ifndef _D3D12MA_BLOCK_METADATA_FUNCTIONS +BlockMetadata::BlockMetadata(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : m_Size(0), + m_IsVirtual(isVirtual), + m_pAllocationCallbacks(allocationCallbacks) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +void BlockMetadata::DebugLogAllocation(UINT64 offset, UINT64 size, void* privateData) const +{ + if (IsVirtual()) + { + D3D12MA_DEBUG_LOG(L"UNFREED VIRTUAL ALLOCATION; Offset: %llu; Size: %llu; PrivateData: %p", offset, size, privateData); + } + else + { + D3D12MA_ASSERT(privateData != NULL); + Allocation* allocation = reinterpret_cast(privateData); + + privateData = allocation->GetPrivateData(); + LPCWSTR name = allocation->GetName(); + + D3D12MA_DEBUG_LOG(L"UNFREED ALLOCATION; Offset: %llu; Size: %llu; PrivateData: %p; Name: %s", + offset, size, privateData, name ? name : L"D3D12MA_Empty"); + } +} + +void BlockMetadata::PrintDetailedMap_Begin(JsonWriter& json, + UINT64 unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString(L"TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString(L"UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString(L"Allocations"); + json.WriteNumber((UINT64)allocationCount); + + json.WriteString(L"UnusedRanges"); + json.WriteNumber((UINT64)unusedRangeCount); + + json.WriteString(L"Suballocations"); + json.BeginArray(); +} + +void BlockMetadata::PrintDetailedMap_Allocation(JsonWriter& json, + UINT64 offset, UINT64 size, void* privateData) const +{ + json.BeginObject(true); + + json.WriteString(L"Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString(L"Size"); + json.WriteNumber(size); + if (privateData) + { + json.WriteString(L"CustomData"); + json.WriteNumber((uintptr_t)privateData); + } + } + else + { + const Allocation* const alloc = (const Allocation*)privateData; + D3D12MA_ASSERT(alloc); + json.AddAllocationToObject(*alloc); + } + json.EndObject(); +} + +void BlockMetadata::PrintDetailedMap_UnusedRange(JsonWriter& json, + UINT64 offset, UINT64 size) const +{ + json.BeginObject(true); + + json.WriteString(L"Offset"); + json.WriteNumber(offset); + + json.WriteString(L"Type"); + json.WriteString(L"FREE"); + + json.WriteString(L"Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void BlockMetadata::PrintDetailedMap_End(JsonWriter& json) const +{ + json.EndArray(); +} +#endif // _D3D12MA_BLOCK_METADATA_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA + +#ifndef _D3D12MA_BLOCK_METADATA_LINEAR +class BlockMetadata_Linear : public BlockMetadata +{ +public: + BlockMetadata_Linear(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); + virtual ~BlockMetadata_Linear() = default; + + UINT64 GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return (UINT64)allocHandle - 1; }; + + void Init(UINT64 size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; + + bool CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) override; + + void Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) override; + + void Free(AllocHandle allocHandle) override; + void Clear() override; + + AllocHandle GetAllocationListBegin() const override; + AllocHandle GetNextAllocation(AllocHandle prevAlloc) const override; + UINT64 GetNextFreeRegionSize(AllocHandle alloc) const override; + void* GetAllocationPrivateData(AllocHandle allocHandle) const override; + void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; + + void AddStatistics(Statistics& inoutStats) const override; + void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; + void WriteAllocationInfoToJson(JsonWriter& json) const override; + void DebugLogAllAllocations() const override; + +private: + /* + There are two suballocation vectors, used in ping-pong way. + The one with index m_1stVectorIndex is called 1st. + The one with index (m_1stVectorIndex ^ 1) is called 2nd. + 2nd can be non-empty only when 1st is not empty. + When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. + */ + typedef Vector SuballocationVectorType; + + enum ALLOC_REQUEST_TYPE + { + ALLOC_REQUEST_UPPER_ADDRESS, + ALLOC_REQUEST_END_OF_1ST, + ALLOC_REQUEST_END_OF_2ND, + }; + + enum SECOND_VECTOR_MODE + { + SECOND_VECTOR_EMPTY, + /* + Suballocations in 2nd vector are created later than the ones in 1st, but they + all have smaller offset. + */ + SECOND_VECTOR_RING_BUFFER, + /* + Suballocations in 2nd vector are upper side of double stack. + They all have offsets higher than those in 1st vector. + Top of this stack means smaller offsets, but higher indices in this vector. + */ + SECOND_VECTOR_DOUBLE_STACK, + }; + + UINT64 m_SumFreeSize; + SuballocationVectorType m_Suballocations0, m_Suballocations1; + UINT32 m_1stVectorIndex; + SECOND_VECTOR_MODE m_2ndVectorMode; + // Number of items in 1st vector with hAllocation = null at the beginning. + size_t m_1stNullItemsBeginCount; + // Number of other items in 1st vector with hAllocation = null somewhere in the middle. + size_t m_1stNullItemsMiddleCount; + // Number of items in 2nd vector with hAllocation = null. + size_t m_2ndNullItemsCount; + + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + Suballocation& FindSuballocation(UINT64 offset) const; + bool ShouldCompact1st() const; + void CleanupAfterFree(); + + bool CreateAllocationRequest_LowerAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + bool CreateAllocationRequest_UpperAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + + D3D12MA_CLASS_NO_COPY(BlockMetadata_Linear) +}; + +#ifndef _D3D12MA_BLOCK_METADATA_LINEAR_FUNCTIONS +BlockMetadata_Linear::BlockMetadata_Linear(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : BlockMetadata(allocationCallbacks, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(*allocationCallbacks), + m_Suballocations1(*allocationCallbacks), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +void BlockMetadata_Linear::Init(UINT64 size) +{ + BlockMetadata::Init(size); + m_SumFreeSize = size; +} + +bool BlockMetadata_Linear::Validate() const +{ + D3D12MA_VALIDATE(GetSumFreeSize() <= GetSize()); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + D3D12MA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + D3D12MA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + D3D12MA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + D3D12MA_VALIDATE(suballocations1st.back().type != SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + D3D12MA_VALIDATE(suballocations2nd.back().type != SUBALLOCATION_TYPE_FREE); + } + + D3D12MA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + D3D12MA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + UINT64 sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + UINT64 offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) + { + const Suballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); + + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) + { + D3D12MA_VALIDATE(currFree == (alloc == NULL)); + } + D3D12MA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); + } + + D3D12MA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const Suballocation& suballoc = suballocations1st[i]; + D3D12MA_VALIDATE(suballoc.type == SUBALLOCATION_TYPE_FREE && + suballoc.privateData == NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const Suballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); + + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) + { + D3D12MA_VALIDATE(currFree == (alloc == NULL)); + } + D3D12MA_VALIDATE(suballoc.offset >= offset); + D3D12MA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem1stCount; + } + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); + } + D3D12MA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) + { + const Suballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == SUBALLOCATION_TYPE_FREE); + + const Allocation* alloc = (Allocation*)suballoc.privateData; + if (!IsVirtual()) + { + D3D12MA_VALIDATE(currFree == (alloc == NULL)); + } + D3D12MA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + D3D12MA_VALIDATE(GetAllocationOffset(alloc->GetAllocHandle()) == suballoc.offset); + D3D12MA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + GetDebugMargin(); + } + + D3D12MA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + D3D12MA_VALIDATE(offset <= GetSize()); + D3D12MA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; +} + +size_t BlockMetadata_Linear::GetAllocationCount() const +{ + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t BlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return SIZE_MAX; +} + +void BlockMetadata_Linear::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const +{ + const Suballocation& suballoc = FindSuballocation((UINT64)allocHandle - 1); + outInfo.Offset = suballoc.offset; + outInfo.Size = suballoc.size; + outInfo.pPrivateData = suballoc.privateData; +} + +bool BlockMetadata_Linear::CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + D3D12MA_ASSERT(pAllocationRequest != NULL); + D3D12MA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, pAllocationRequest); +} + +void BlockMetadata_Linear::Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) +{ + UINT64 offset = (UINT64)request.allocHandle - 1; + const Suballocation newSuballoc = { offset, request.size, privateData, SUBALLOCATION_TYPE_ALLOCATION }; + + switch (request.algorithmData) + { + case ALLOC_REQUEST_UPPER_ADDRESS: + { + D3D12MA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + break; + } + case ALLOC_REQUEST_END_OF_1ST: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + D3D12MA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + D3D12MA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + break; + } + case ALLOC_REQUEST_END_OF_2ND: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + D3D12MA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + D3D12MA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + D3D12MA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + D3D12MA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + D3D12MA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + break; + } + default: + D3D12MA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + m_SumFreeSize -= newSuballoc.size; +} + +void BlockMetadata_Linear::Free(AllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + UINT64 offset = (UINT64)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + Suballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = SUBALLOCATION_TYPE_FREE; + firstSuballoc.privateData = NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } + } + + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + Suballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + Suballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + Suballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = BinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + SuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = SUBALLOCATION_TYPE_FREE; + it->privateData = NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetLess()) : + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = SUBALLOCATION_TYPE_FREE; + it->privateData = NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + D3D12MA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void BlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +AllocHandle BlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return (AllocHandle)0; +} + +AllocHandle BlockMetadata_Linear::GetNextAllocation(AllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return (AllocHandle)0; +} + +UINT64 BlockMetadata_Linear::GetNextFreeRegionSize(AllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + D3D12MA_ASSERT(0); + return 0; +} + +void* BlockMetadata_Linear::GetAllocationPrivateData(AllocHandle allocHandle) const +{ + return FindSuballocation((UINT64)allocHandle - 1).privateData; +} + +void BlockMetadata_Linear::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) +{ + Suballocation& suballoc = FindSuballocation((UINT64)allocHandle - 1); + suballoc.privateData = privateData; +} + +void BlockMetadata_Linear::AddStatistics(Statistics& inoutStats) const +{ + inoutStats.BlockCount++; + inoutStats.AllocationCount += (UINT)GetAllocationCount(); + inoutStats.BlockBytes += GetSize(); + inoutStats.AllocationBytes += GetSize() - m_SumFreeSize; +} + +void BlockMetadata_Linear::AddDetailedStatistics(DetailedStatistics& inoutStats) const +{ + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += GetSize(); + + const UINT64 size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + UINT64 lastOffset = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const UINT64 unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const UINT64 freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const UINT64 unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + AddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const UINT64 unusedRangeSize = size - lastOffset; + AddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } +} + +void BlockMetadata_Linear::WriteAllocationInfoToJson(JsonWriter& json) const +{ + const UINT64 size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + UINT64 usedBytes = 0; + + UINT64 lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const UINT64 freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const UINT64 unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const UINT64 freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const UINT64 unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].privateData == NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const Suballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const UINT64 unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].privateData == NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const Suballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const UINT64 unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.privateData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const UINT64 unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} + +void BlockMetadata_Linear::DebugLogAllAllocations() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) + if (it->type != SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->privateData); + + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) + if (it->type != SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->privateData); +} + +Suballocation& BlockMetadata_Linear::FindSuballocation(UINT64 offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + Suballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + const SuballocationVectorType::const_iterator it = BinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + SuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + return const_cast(*it); + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + const SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetLess()) : + BinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, SuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + return const_cast(*it); + } + } + + D3D12MA_ASSERT(0 && "Allocation not found in linear allocator!"); + return const_cast(suballocations1st.back()); // Should never occur. +} + +bool BlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void BlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + D3D12MA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.remove(0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + D3D12MA_HEAVY_ASSERT(Validate()); +} + +bool BlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + const UINT64 blockSize = GetSize(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + UINT64 resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const Suballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + GetDebugMargin(); + } + + // Start from offset equal to beginning of free space. + UINT64 resultOffset = resultBaseOffset; + // Apply alignment. + resultOffset = AlignUp(resultOffset, allocAlignment); + + const UINT64 freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + GetDebugMargin() <= freeSpaceEnd) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->algorithmData = ALLOC_REQUEST_END_OF_1ST; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + D3D12MA_ASSERT(!suballocations1st.empty()); + + UINT64 resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const Suballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + GetDebugMargin(); + } + + // Start from offset equal to beginning of free space. + UINT64 resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = AlignUp(resultOffset, allocAlignment); + + size_t index1st = m_1stNullItemsBeginCount; + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + GetDebugMargin() <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + GetDebugMargin() <= suballocations1st[index1st].offset)) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + pAllocationRequest->algorithmData = ALLOC_REQUEST_END_OF_2ND; + // pAllocationRequest->item, customData unused. + return true; + } + } + return false; +} + +bool BlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + const UINT64 blockSize = GetSize(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + D3D12MA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + UINT64 resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const Suballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + UINT64 resultOffset = resultBaseOffset; + // Apply debugMargin at the end. + if (GetDebugMargin() > 0) + { + if (resultOffset < GetDebugMargin()) + { + return false; + } + resultOffset -= GetDebugMargin(); + } + + // Apply alignment. + resultOffset = AlignDown(resultOffset, allocAlignment); + // There is enough free space. + const UINT64 endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : 0; + + if (endOf1st + GetDebugMargin() <= resultOffset) + { + // All tests passed: Success. + pAllocationRequest->allocHandle = (AllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->algorithmData = ALLOC_REQUEST_UPPER_ADDRESS; + return true; + } + return false; +} +#endif // _D3D12MA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA_LINEAR + +#ifndef _D3D12MA_BLOCK_METADATA_TLSF +class BlockMetadata_TLSF : public BlockMetadata +{ +public: + BlockMetadata_TLSF(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual); + virtual ~BlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + UINT64 GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + UINT64 GetAllocationOffset(AllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }; + + void Init(UINT64 size) override; + bool Validate() const override; + void GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const override; + + bool CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) override; + + void Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) override; + + void Free(AllocHandle allocHandle) override; + void Clear() override; + + AllocHandle GetAllocationListBegin() const override; + AllocHandle GetNextAllocation(AllocHandle prevAlloc) const override; + UINT64 GetNextFreeRegionSize(AllocHandle alloc) const override; + void* GetAllocationPrivateData(AllocHandle allocHandle) const override; + void SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) override; + + void AddStatistics(Statistics& inoutStats) const override; + void AddDetailedStatistics(DetailedStatistics& inoutStats) const override; + void WriteAllocationInfoToJson(JsonWriter& json) const override; + void DebugLogAllAllocations() const override; + +private: + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const UINT8 SECOND_LEVEL_INDEX = 5; + static const UINT16 SMALL_BUFFER_SIZE = 256; + static const UINT INITIAL_BLOCK_ALLOC_COUNT = 16; + static const UINT8 MEMORY_CLASS_SHIFT = 7; + static const UINT8 MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + + class Block + { + public: + UINT64 offset; + UINT64 size; + Block* prevPhysical; + Block* nextPhysical; + + void MarkFree() { prevFree = NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& PrivateData() { D3D12MA_HEAVY_ASSERT(!IsFree()); return privateData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { D3D12MA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken + union + { + Block* nextFree; + void* privateData; + }; + }; + + size_t m_AllocCount = 0; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount = 0; + // Total size of free blocks excluding null block + UINT64 m_BlocksFreeSize = 0; + UINT32 m_IsFreeBitmap = 0; + UINT8 m_MemoryClasses = 0; + UINT32 m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + UINT32 m_ListsCount = 0; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList = NULL; + PoolAllocator m_BlockAllocator; + Block* m_NullBlock = NULL; + + UINT8 SizeToMemoryClass(UINT64 size) const; + UINT16 SizeToSecondIndex(UINT64 size, UINT8 memoryClass) const; + UINT32 GetListIndex(UINT8 memoryClass, UINT16 secondIndex) const; + UINT32 GetListIndex(UINT64 size) const; + + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(UINT64 size, UINT32& listIndex) const; + bool CheckBlock( + Block& block, + UINT32 listIndex, + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest); + + D3D12MA_CLASS_NO_COPY(BlockMetadata_TLSF) +}; + +#ifndef _D3D12MA_BLOCK_METADATA_TLSF_FUNCTIONS +BlockMetadata_TLSF::BlockMetadata_TLSF(const ALLOCATION_CALLBACKS* allocationCallbacks, bool isVirtual) + : BlockMetadata(allocationCallbacks, isVirtual), + m_BlockAllocator(*allocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT) +{ + D3D12MA_ASSERT(allocationCallbacks); +} + +BlockMetadata_TLSF::~BlockMetadata_TLSF() +{ + D3D12MA_DELETE_ARRAY(*GetAllocs(), m_FreeList, m_ListsCount); +} + +void BlockMetadata_TLSF::Init(UINT64 size) +{ + BlockMetadata::Init(size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = NULL; + m_NullBlock->nextPhysical = NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = NULL; + m_NullBlock->PrevFree() = NULL; + UINT8 memoryClass = SizeToMemoryClass(size); + UINT16 sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + 2; + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(UINT32)); + + m_FreeList = D3D12MA_NEW_ARRAY(*GetAllocs(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool BlockMetadata_TLSF::Validate() const +{ + D3D12MA_VALIDATE(GetSumFreeSize() <= GetSize()); + + UINT64 calculatedSize = m_NullBlock->size; + UINT64 calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (UINT32 list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != NULL) + { + D3D12MA_VALIDATE(block->IsFree()); + D3D12MA_VALIDATE(block->PrevFree() == NULL); + while (block->NextFree()) + { + D3D12MA_VALIDATE(block->NextFree()->IsFree()); + D3D12MA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + D3D12MA_VALIDATE(m_NullBlock->nextPhysical == NULL); + if (m_NullBlock->prevPhysical) + { + D3D12MA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + + // Check all blocks + UINT64 nextOffset = m_NullBlock->offset; + for (Block* prev = m_NullBlock->prevPhysical; prev != NULL; prev = prev->prevPhysical) + { + D3D12MA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + UINT32 listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + D3D12MA_VALIDATE(freeBlock != NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != NULL); + + D3D12MA_VALIDATE(found); + calculatedFreeSize += prev->size; + } + else + { + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + D3D12MA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + } + + if (prev->prevPhysical) + { + D3D12MA_VALIDATE(prev->prevPhysical->nextPhysical == prev); + } + } + + D3D12MA_VALIDATE(nextOffset == 0); + D3D12MA_VALIDATE(calculatedSize == GetSize()); + D3D12MA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + D3D12MA_VALIDATE(allocCount == m_AllocCount); + D3D12MA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; +} + +void BlockMetadata_TLSF::GetAllocationInfo(AllocHandle allocHandle, VIRTUAL_ALLOCATION_INFO& outInfo) const +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.Offset = block->offset; + outInfo.Size = block->size; + outInfo.pPrivateData = block->PrivateData(); +} + +bool BlockMetadata_TLSF::CreateAllocationRequest( + UINT64 allocSize, + UINT64 allocAlignment, + bool upperAddress, + UINT32 strategy, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + D3D12MA_ASSERT(!upperAddress && "ALLOCATION_FLAG_UPPER_ADDRESS can be used only with linear algorithm."); + D3D12MA_ASSERT(pAllocationRequest != NULL); + D3D12MA_HEAVY_ASSERT(Validate()); + + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest); + + // Round up to the next block + UINT64 sizeForNextList = allocSize; + UINT16 smallSizeStep = SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4); + if (allocSize > SMALL_BUFFER_SIZE) + { + sizeForNextList += (1ULL << (BitScanMSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + UINT32 nextListIndex = 0; + UINT32 prevListIndex = 0; + Block* nextListBlock = NULL; + Block* prevListBlock = NULL; + + // Check blocks according to strategies + if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_TIME) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_MEMORY) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & ALLOCATION_FLAG_STRATEGY_MIN_OFFSET) + { + // Perform search from the start + Vector blockList(m_BlocksFreeCount, *GetAllocs()); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) + { + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + + // No more memory sadly + return false; +} + +void BlockMetadata_TLSF::Alloc( + const AllocationRequest& request, + UINT64 allocSize, + void* privateData) +{ + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + UINT64 offset = request.algorithmData; + D3D12MA_ASSERT(currentBlock != NULL); + D3D12MA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + // Append missing alignment to prev block or create new one + UINT64 misssingAlignment = offset - currentBlock->offset; + if (misssingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + D3D12MA_ASSERT(prevBlock != NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != GetDebugMargin()) + { + UINT32 oldList = GetListIndex(prevBlock->size); + prevBlock->size += misssingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= misssingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += misssingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += misssingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = misssingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= misssingAlignment; + currentBlock->offset += misssingAlignment; + } + + UINT64 size = request.size + GetDebugMargin(); + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = NULL; + m_NullBlock->NextFree() = NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + D3D12MA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = NULL; + m_NullBlock->PrevFree() = NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->PrivateData() = privateData; + + if (GetDebugMargin() > 0) + { + currentBlock->size -= GetDebugMargin(); + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = GetDebugMargin(); + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + ++m_AllocCount; +} + +void BlockMetadata_TLSF::Free(AllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + D3D12MA_ASSERT(!block->IsFree() && "Block is already free!"); + + --m_AllocCount; + if (GetDebugMargin() > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != NULL && prev->IsFree() && prev->size != GetDebugMargin()) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void BlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(UINT32)); +} + +AllocHandle BlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return (AllocHandle)0; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (AllocHandle)block; + } + D3D12MA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return (AllocHandle)0; +} + +AllocHandle BlockMetadata_TLSF::GetNextAllocation(AllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + D3D12MA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (AllocHandle)block; + } + return (AllocHandle)0; +} + +UINT64 BlockMetadata_TLSF::GetNextFreeRegionSize(AllocHandle alloc) const +{ + Block* block = (Block*)alloc; + D3D12MA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void* BlockMetadata_TLSF::GetAllocationPrivateData(AllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->PrivateData(); +} + +void BlockMetadata_TLSF::SetAllocationPrivateData(AllocHandle allocHandle, void* privateData) +{ + Block* block = (Block*)allocHandle; + D3D12MA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->PrivateData() = privateData; +} + +void BlockMetadata_TLSF::AddStatistics(Statistics& inoutStats) const +{ + inoutStats.BlockCount++; + inoutStats.AllocationCount += static_cast(m_AllocCount); + inoutStats.BlockBytes += GetSize(); + inoutStats.AllocationBytes += GetSize() - GetSumFreeSize(); +} + +void BlockMetadata_TLSF::AddDetailedStatistics(DetailedStatistics& inoutStats) const +{ + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += GetSize(); + + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + if (block->IsFree()) + AddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + AddDetailedStatisticsAllocation(inoutStats, block->size); + } + + if (m_NullBlock->size > 0) + AddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); +} + +void BlockMetadata_TLSF::WriteAllocationInfoToJson(JsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + Vector blockList(blockCount, *GetAllocs()); + + size_t i = blockCount; + if (m_NullBlock->size > 0) + { + ++blockCount; + blockList.push_back(m_NullBlock); + } + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + D3D12MA_ASSERT(i == 0); + + PrintDetailedMap_Begin(json, GetSumFreeSize(), GetAllocationCount(), m_BlocksFreeCount + static_cast(m_NullBlock->size)); + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->PrivateData()); + } + PrintDetailedMap_End(json); +} + +void BlockMetadata_TLSF::DebugLogAllAllocations() const +{ + for (Block* block = m_NullBlock->prevPhysical; block != NULL; block = block->prevPhysical) + { + if (!block->IsFree()) + { + DebugLogAllocation(block->offset, block->size, block->PrivateData()); + } + } +} + +UINT8 BlockMetadata_TLSF::SizeToMemoryClass(UINT64 size) const +{ + if (size > SMALL_BUFFER_SIZE) + return BitScanMSB(size) - MEMORY_CLASS_SHIFT; + return 0; +} + +UINT16 BlockMetadata_TLSF::SizeToSecondIndex(UINT64 size, UINT8 memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +UINT32 BlockMetadata_TLSF::GetListIndex(UINT8 memoryClass, UINT16 secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const UINT32 index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +UINT32 BlockMetadata_TLSF::GetListIndex(UINT64 size) const +{ + UINT8 memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void BlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + D3D12MA_ASSERT(block != m_NullBlock); + D3D12MA_ASSERT(block->IsFree()); + + if (block->NextFree() != NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + UINT8 memClass = SizeToMemoryClass(block->size); + UINT16 secondIndex = SizeToSecondIndex(block->size, memClass); + UINT32 index = GetListIndex(memClass, secondIndex); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->PrivateData() = NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void BlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + D3D12MA_ASSERT(block != m_NullBlock); + D3D12MA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + UINT8 memClass = SizeToMemoryClass(block->size); + UINT16 secondIndex = SizeToSecondIndex(block->size, memClass); + UINT32 index = GetListIndex(memClass, secondIndex); + block->PrevFree() = NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void BlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + D3D12MA_ASSERT(block->prevPhysical == prev && "Cannot merge seperate physical regions!"); + D3D12MA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +BlockMetadata_TLSF::Block* BlockMetadata_TLSF::FindFreeBlock(UINT64 size, UINT32& listIndex) const +{ + UINT8 memoryClass = SizeToMemoryClass(size); + UINT32 innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for avaiable blocks + UINT32 freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return NULL; // No more memory avaible + + // Find lowest free region + memoryClass = BitScanLSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + D3D12MA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, BitScanLSB(innerFreeMap)); + return m_FreeList[listIndex]; +} + +bool BlockMetadata_TLSF::CheckBlock( + Block& block, + UINT32 listIndex, + UINT64 allocSize, + UINT64 allocAlignment, + AllocationRequest* pAllocationRequest) +{ + D3D12MA_ASSERT(block.IsFree() && "Block is already taken!"); + + UINT64 alignedOffset = AlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Alloc successful + pAllocationRequest->allocHandle = (AllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _D3D12MA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _D3D12MA_BLOCK_METADATA_TLSF + +#ifndef _D3D12MA_MEMORY_BLOCK +/* +Represents a single block of device memory (heap). +Base class for inheritance. +Thread-safety: This class must be externally synchronized. +*/ +class MemoryBlock +{ +public: + // Creates the ID3D12Heap. + MemoryBlock( + AllocatorPimpl* allocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id); + virtual ~MemoryBlock(); + + const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } + D3D12_HEAP_FLAGS GetHeapFlags() const { return m_HeapFlags; } + UINT64 GetSize() const { return m_Size; } + UINT GetId() const { return m_Id; } + ID3D12Heap* GetHeap() const { return m_Heap; } + +protected: + AllocatorPimpl* const m_Allocator; + const D3D12_HEAP_PROPERTIES m_HeapProps; + const D3D12_HEAP_FLAGS m_HeapFlags; + const UINT64 m_Size; + const UINT m_Id; + + HRESULT Init(ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures); + +private: + ID3D12Heap* m_Heap = NULL; + + D3D12MA_CLASS_NO_COPY(MemoryBlock) +}; +#endif // _D3D12MA_MEMORY_BLOCK + +#ifndef _D3D12MA_NORMAL_BLOCK +/* +Represents a single block of device memory (heap) with all the data about its +regions (aka suballocations, Allocation), assigned and free. +Thread-safety: This class must be externally synchronized. +*/ +class NormalBlock : public MemoryBlock +{ +public: + BlockMetadata* m_pMetadata; + + NormalBlock( + AllocatorPimpl* allocator, + BlockVector* blockVector, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id); + virtual ~NormalBlock(); + + BlockVector* GetBlockVector() const { return m_BlockVector; } + + // 'algorithm' should be one of the *_ALGORITHM_* flags in enums POOL_FLAGS or VIRTUAL_BLOCK_FLAGS + HRESULT Init(UINT32 algorithm, ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + +private: + BlockVector* m_BlockVector; + + D3D12MA_CLASS_NO_COPY(NormalBlock) +}; +#endif // _D3D12MA_NORMAL_BLOCK + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST_ITEM_TRAITS +struct CommittedAllocationListItemTraits +{ + using ItemType = Allocation; + + static ItemType* GetPrev(const ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.prev; + } + static ItemType* GetNext(const ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.next; + } + static ItemType*& AccessPrev(ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.prev; + } + static ItemType*& AccessNext(ItemType* item) + { + D3D12MA_ASSERT(item->m_PackedData.GetType() == Allocation::TYPE_COMMITTED || item->m_PackedData.GetType() == Allocation::TYPE_HEAP); + return item->m_Committed.next; + } +}; +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST +/* +Stores linked list of Allocation objects that are of TYPE_COMMITTED or TYPE_HEAP. +Thread-safe, synchronized internally. +*/ +class CommittedAllocationList +{ +public: + CommittedAllocationList() = default; + void Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool); + ~CommittedAllocationList(); + + D3D12_HEAP_TYPE GetHeapType() const { return m_HeapType; } + PoolPimpl* GetPool() const { return m_Pool; } + UINT GetMemorySegmentGroup(AllocatorPimpl* allocator) const; + + void AddStatistics(Statistics& inoutStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + // Writes JSON array with the list of allocations. + void BuildStatsString(JsonWriter& json); + + void Register(Allocation* alloc); + void Unregister(Allocation* alloc); + +private: + using CommittedAllocationLinkedList = IntrusiveLinkedList; + + bool m_UseMutex = true; + D3D12_HEAP_TYPE m_HeapType = D3D12_HEAP_TYPE_CUSTOM; + PoolPimpl* m_Pool = NULL; + + D3D12MA_RW_MUTEX m_Mutex; + CommittedAllocationLinkedList m_AllocationList; +}; +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST + +#ifndef _D3D12M_COMMITTED_ALLOCATION_PARAMETERS +struct CommittedAllocationParameters +{ + CommittedAllocationList* m_List = NULL; + D3D12_HEAP_PROPERTIES m_HeapProperties = {}; + D3D12_HEAP_FLAGS m_HeapFlags = D3D12_HEAP_FLAG_NONE; + ID3D12ProtectedResourceSession* m_ProtectedSession = NULL; + bool m_CanAlias = false; + D3D12_RESIDENCY_PRIORITY m_ResidencyPriority = D3D12_RESIDENCY_PRIORITY_NONE; + + bool IsValid() const { return m_List != NULL; } +}; +#endif // _D3D12M_COMMITTED_ALLOCATION_PARAMETERS + +// Simple variant data structure to hold all possible variations of ID3D12Device*::CreateCommittedResource* and ID3D12Device*::CreatePlacedResource* arguments +struct CREATE_RESOURCE_PARAMS +{ + CREATE_RESOURCE_PARAMS() = delete; + CREATE_RESOURCE_PARAMS( + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue) + : Variant(VARIANT_WITH_STATE) + , pResourceDesc(pResourceDesc) + , InitialResourceState(InitialResourceState) + , pOptimizedClearValue(pOptimizedClearValue) + { + } +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + CREATE_RESOURCE_PARAMS( + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue) + : Variant(VARIANT_WITH_STATE_AND_DESC1) + , pResourceDesc1(pResourceDesc) + , InitialResourceState(InitialResourceState) + , pOptimizedClearValue(pOptimizedClearValue) + { + } +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + CREATE_RESOURCE_PARAMS( + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_BARRIER_LAYOUT InitialLayout, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats) + : Variant(VARIANT_WITH_LAYOUT) + , pResourceDesc1(pResourceDesc) + , InitialLayout(InitialLayout) + , pOptimizedClearValue(pOptimizedClearValue) + , NumCastableFormats(NumCastableFormats) + , pCastableFormats(pCastableFormats) + { + } +#endif + + enum VARIANT + { + VARIANT_INVALID = 0, + VARIANT_WITH_STATE, + VARIANT_WITH_STATE_AND_DESC1, + VARIANT_WITH_LAYOUT + }; + + VARIANT Variant = VARIANT_INVALID; + + const D3D12_RESOURCE_DESC* GetResourceDesc() const + { + D3D12MA_ASSERT(Variant == VARIANT_WITH_STATE); + return pResourceDesc; + } + const D3D12_RESOURCE_DESC*& AccessResourceDesc() + { + D3D12MA_ASSERT(Variant == VARIANT_WITH_STATE); + return pResourceDesc; + } + const D3D12_RESOURCE_DESC* GetBaseResourceDesc() const + { + // D3D12_RESOURCE_DESC1 can be cast to D3D12_RESOURCE_DESC by discarding the new members at the end. + return pResourceDesc; + } + D3D12_RESOURCE_STATES GetInitialResourceState() const + { + D3D12MA_ASSERT(Variant < VARIANT_WITH_LAYOUT); + return InitialResourceState; + } + const D3D12_CLEAR_VALUE* GetOptimizedClearValue() const + { + return pOptimizedClearValue; + } + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + const D3D12_RESOURCE_DESC1* GetResourceDesc1() const + { + D3D12MA_ASSERT(Variant >= VARIANT_WITH_STATE_AND_DESC1); + return pResourceDesc1; + } + const D3D12_RESOURCE_DESC1*& AccessResourceDesc1() + { + D3D12MA_ASSERT(Variant >= VARIANT_WITH_STATE_AND_DESC1); + return pResourceDesc1; + } +#endif + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + D3D12_BARRIER_LAYOUT GetInitialLayout() const + { + D3D12MA_ASSERT(Variant >= VARIANT_WITH_LAYOUT); + return InitialLayout; + } + UINT32 GetNumCastableFormats() const + { + D3D12MA_ASSERT(Variant >= VARIANT_WITH_LAYOUT); + return NumCastableFormats; + } + DXGI_FORMAT* GetCastableFormats() const + { + D3D12MA_ASSERT(Variant >= VARIANT_WITH_LAYOUT); + return pCastableFormats; + } +#endif + +private: + union + { + const D3D12_RESOURCE_DESC* pResourceDesc; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + const D3D12_RESOURCE_DESC1* pResourceDesc1; +#endif + }; + union + { + D3D12_RESOURCE_STATES InitialResourceState; +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + D3D12_BARRIER_LAYOUT InitialLayout; +#endif + }; + const D3D12_CLEAR_VALUE* pOptimizedClearValue; +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + UINT32 NumCastableFormats; + DXGI_FORMAT* pCastableFormats; +#endif +}; + +#ifndef _D3D12MA_BLOCK_VECTOR +/* +Sequence of NormalBlock. Represents memory blocks allocated for a specific +heap type and possibly resource type (if only Tier 1 is supported). + +Synchronized internally with a mutex. +*/ +class BlockVector +{ + friend class DefragmentationContextPimpl; + D3D12MA_CLASS_NO_COPY(BlockVector) +public: + BlockVector( + AllocatorPimpl* hAllocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + bool explicitBlockSize, + UINT64 minAllocationAlignment, + UINT32 algorithm, + bool denyMsaaTextures, + ID3D12ProtectedResourceSession* pProtectedSession, + D3D12_RESIDENCY_PRIORITY residencyPriority); + ~BlockVector(); + D3D12_RESIDENCY_PRIORITY GetResidencyPriority() const { return m_ResidencyPriority; } + + const D3D12_HEAP_PROPERTIES& GetHeapProperties() const { return m_HeapProps; } + D3D12_HEAP_FLAGS GetHeapFlags() const { return m_HeapFlags; } + UINT64 GetPreferredBlockSize() const { return m_PreferredBlockSize; } + UINT32 GetAlgorithm() const { return m_Algorithm; } + bool DeniesMsaaTextures() const { return m_DenyMsaaTextures; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + NormalBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + D3D12MA_RW_MUTEX& GetMutex() { return m_Mutex; } + + HRESULT CreateMinBlocks(); + bool IsEmpty(); + + HRESULT Allocate( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + size_t allocationCount, + Allocation** pAllocations); + + void Free(Allocation* hAllocation); + + HRESULT CreateResource( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); + + void AddStatistics(Statistics& inoutStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + + void WriteBlockInfoToJson(JsonWriter& json); + +private: + AllocatorPimpl* const m_hAllocator; + const D3D12_HEAP_PROPERTIES m_HeapProps; + const D3D12_HEAP_FLAGS m_HeapFlags; + const UINT64 m_PreferredBlockSize; + const size_t m_MinBlockCount; + const size_t m_MaxBlockCount; + const bool m_ExplicitBlockSize; + const UINT64 m_MinAllocationAlignment; + const UINT32 m_Algorithm; + const bool m_DenyMsaaTextures; + ID3D12ProtectedResourceSession* const m_ProtectedSession; + const D3D12_RESIDENCY_PRIORITY m_ResidencyPriority; + /* There can be at most one allocation that is completely empty - a + hysteresis to avoid pessimistic case of alternating creation and destruction + of a ID3D12Heap. */ + bool m_HasEmptyBlock; + D3D12MA_RW_MUTEX m_Mutex; + // Incrementally sorted by sumFreeSize, ascending. + Vector m_Blocks; + UINT m_NextBlockId; + bool m_IncrementalSort = true; + + // Disable incremental sorting when freeing allocations + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + + UINT64 CalcSumBlockSize() const; + UINT64 CalcMaxBlockSize() const; + + // Finds and removes given block from vector. + void Remove(NormalBlock* pBlock); + + // Performs single step in sorting m_Blocks. They may not be fully sorted + // after this call. + void IncrementallySortBlocks(); + void SortByFreeSize(); + + HRESULT AllocatePage( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + Allocation** pAllocation); + + HRESULT AllocateFromBlock( + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + ALLOCATION_FLAGS allocFlags, + void* pPrivateData, + UINT32 strategy, + Allocation** pAllocation); + + HRESULT CommitAllocationRequest( + AllocationRequest& allocRequest, + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + void* pPrivateData, + Allocation** pAllocation); + + HRESULT CreateBlock( + UINT64 blockSize, + size_t* pNewBlockIndex); +}; +#endif // _D3D12MA_BLOCK_VECTOR + +#ifndef _D3D12MA_CURRENT_BUDGET_DATA +class CurrentBudgetData +{ +public: + bool ShouldUpdateBudget() const { return m_OperationsSinceBudgetFetch >= 30; } + + void GetStatistics(Statistics& outStats, UINT group) const; + void GetBudget(bool useMutex, + UINT64* outLocalUsage, UINT64* outLocalBudget, + UINT64* outNonLocalUsage, UINT64* outNonLocalBudget); + +#if D3D12MA_DXGI_1_4 + HRESULT UpdateBudget(IDXGIAdapter3* adapter3, bool useMutex); +#endif + + void AddAllocation(UINT group, UINT64 allocationBytes); + void RemoveAllocation(UINT group, UINT64 allocationBytes); + + void AddBlock(UINT group, UINT64 blockBytes); + void RemoveBlock(UINT group, UINT64 blockBytes); + +private: + D3D12MA_ATOMIC_UINT32 m_BlockCount[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT32 m_AllocationCount[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT64 m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + D3D12MA_ATOMIC_UINT64 m_AllocationBytes[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + + D3D12MA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch = {0}; + D3D12MA_RW_MUTEX m_BudgetMutex; + UINT64 m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + UINT64 m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; + UINT64 m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_COUNT] = {}; +}; + +#ifndef _D3D12MA_CURRENT_BUDGET_DATA_FUNCTIONS +void CurrentBudgetData::GetStatistics(Statistics& outStats, UINT group) const +{ + outStats.BlockCount = m_BlockCount[group]; + outStats.AllocationCount = m_AllocationCount[group]; + outStats.BlockBytes = m_BlockBytes[group]; + outStats.AllocationBytes = m_AllocationBytes[group]; +} + +void CurrentBudgetData::GetBudget(bool useMutex, + UINT64* outLocalUsage, UINT64* outLocalBudget, + UINT64* outNonLocalUsage, UINT64* outNonLocalBudget) +{ + MutexLockRead lockRead(m_BudgetMutex, useMutex); + + if (outLocalUsage) + { + const UINT64 D3D12Usage = m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + const UINT64 blockBytes = m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + const UINT64 blockBytesAtD3D12Fetch = m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + *outLocalUsage = D3D12Usage + blockBytes > blockBytesAtD3D12Fetch ? + D3D12Usage + blockBytes - blockBytesAtD3D12Fetch : 0; + } + if (outLocalBudget) + *outLocalBudget = m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY]; + + if (outNonLocalUsage) + { + const UINT64 D3D12Usage = m_D3D12Usage[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + const UINT64 blockBytes = m_BlockBytes[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + const UINT64 blockBytesAtD3D12Fetch = m_BlockBytesAtD3D12Fetch[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; + *outNonLocalUsage = D3D12Usage + blockBytes > blockBytesAtD3D12Fetch ? + D3D12Usage + blockBytes - blockBytesAtD3D12Fetch : 0; + } + if (outNonLocalBudget) + *outNonLocalBudget = m_D3D12Budget[DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY]; +} + +#if D3D12MA_DXGI_1_4 +HRESULT CurrentBudgetData::UpdateBudget(IDXGIAdapter3* adapter3, bool useMutex) +{ + D3D12MA_ASSERT(adapter3); + + DXGI_QUERY_VIDEO_MEMORY_INFO infoLocal = {}; + DXGI_QUERY_VIDEO_MEMORY_INFO infoNonLocal = {}; + const HRESULT hrLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &infoLocal); + const HRESULT hrNonLocal = adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &infoNonLocal); + + if (SUCCEEDED(hrLocal) || SUCCEEDED(hrNonLocal)) + { + MutexLockWrite lockWrite(m_BudgetMutex, useMutex); + + if (SUCCEEDED(hrLocal)) + { + m_D3D12Usage[0] = infoLocal.CurrentUsage; + m_D3D12Budget[0] = infoLocal.Budget; + } + if (SUCCEEDED(hrNonLocal)) + { + m_D3D12Usage[1] = infoNonLocal.CurrentUsage; + m_D3D12Budget[1] = infoNonLocal.Budget; + } + + m_BlockBytesAtD3D12Fetch[0] = m_BlockBytes[0]; + m_BlockBytesAtD3D12Fetch[1] = m_BlockBytes[1]; + m_OperationsSinceBudgetFetch = 0; + } + + return FAILED(hrLocal) ? hrLocal : hrNonLocal; +} +#endif // #if D3D12MA_DXGI_1_4 + +void CurrentBudgetData::AddAllocation(UINT group, UINT64 allocationBytes) +{ + ++m_AllocationCount[group]; + m_AllocationBytes[group] += allocationBytes; + ++m_OperationsSinceBudgetFetch; +} + +void CurrentBudgetData::RemoveAllocation(UINT group, UINT64 allocationBytes) +{ + D3D12MA_ASSERT(m_AllocationBytes[group] >= allocationBytes); + D3D12MA_ASSERT(m_AllocationCount[group] > 0); + m_AllocationBytes[group] -= allocationBytes; + --m_AllocationCount[group]; + ++m_OperationsSinceBudgetFetch; +} + +void CurrentBudgetData::AddBlock(UINT group, UINT64 blockBytes) +{ + ++m_BlockCount[group]; + m_BlockBytes[group] += blockBytes; + ++m_OperationsSinceBudgetFetch; +} + +void CurrentBudgetData::RemoveBlock(UINT group, UINT64 blockBytes) +{ + D3D12MA_ASSERT(m_BlockBytes[group] >= blockBytes); + D3D12MA_ASSERT(m_BlockCount[group] > 0); + m_BlockBytes[group] -= blockBytes; + --m_BlockCount[group]; + ++m_OperationsSinceBudgetFetch; +} +#endif // _D3D12MA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _D3D12MA_CURRENT_BUDGET_DATA + +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL +class DefragmentationContextPimpl +{ + D3D12MA_CLASS_NO_COPY(DefragmentationContextPimpl) +public: + DefragmentationContextPimpl( + AllocatorPimpl* hAllocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector); + ~DefragmentationContextPimpl(); + + void GetStats(DEFRAGMENTATION_STATS& outStats) { outStats = m_GlobalStats; } + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_Moves.GetAllocs(); } + + HRESULT DefragmentPassBegin(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo); + HRESULT DefragmentPassEnd(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const UINT8 MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock + { + UINT32 data; + NormalBlock* block; + }; + struct StateBalanced + { + UINT64 avgFreeSize = 0; + UINT64 avgAllocSize = UINT64_MAX; + }; + struct MoveAllocationData + { + UINT64 size; + UINT64 alignment; + ALLOCATION_FLAGS flags; + DEFRAGMENTATION_MOVE move = {}; + }; + + const UINT64 m_MaxPassBytes; + const UINT32 m_MaxPassAllocations; + + Vector m_Moves; + + UINT8 m_IgnoredAllocs = 0; + UINT32 m_Algorithm; + UINT32 m_BlockVectorCount; + BlockVector* m_PoolBlockVector; + BlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + DEFRAGMENTATION_STATS m_GlobalStats = { 0 }; + DEFRAGMENTATION_STATS m_PassStats = { 0 }; + void* m_AlgorithmState = NULL; + + static MoveAllocationData GetMoveData(AllocHandle handle, BlockMetadata* metadata); + CounterStatus CheckCounters(UINT64 bytes); + bool IncrementCounters(UINT64 bytes); + bool ReallocWithinBlock(BlockVector& vector, NormalBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, BlockVector& vector); + + bool ComputeDefragmentation(BlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(BlockVector& vector); + bool ComputeDefragmentation_Balanced(BlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(BlockVector& vector); + + void UpdateVectorStatistics(BlockVector& vector, StateBalanced& state); +}; +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL + +#ifndef _D3D12MA_POOL_PIMPL +class PoolPimpl +{ + friend class Allocator; + friend struct PoolListItemTraits; +public: + PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc); + ~PoolPimpl(); + + AllocatorPimpl* GetAllocator() const { return m_Allocator; } + const POOL_DESC& GetDesc() const { return m_Desc; } + bool SupportsCommittedAllocations() const { return m_Desc.BlockSize == 0; } + LPCWSTR GetName() const { return m_Name; } + + BlockVector* GetBlockVector() { return m_BlockVector; } + CommittedAllocationList* GetCommittedAllocationList() { return SupportsCommittedAllocations() ? &m_CommittedAllocations : NULL; } + + HRESULT Init(); + void GetStatistics(Statistics& outStats); + void CalculateStatistics(DetailedStatistics& outStats); + void AddDetailedStatistics(DetailedStatistics& inoutStats); + void SetName(LPCWSTR Name); + +private: + AllocatorPimpl* m_Allocator; // Externally owned object. + POOL_DESC m_Desc; + BlockVector* m_BlockVector; // Owned object. + CommittedAllocationList m_CommittedAllocations; + wchar_t* m_Name; + PoolPimpl* m_PrevPool = NULL; + PoolPimpl* m_NextPool = NULL; + + void FreeName(); +}; + +struct PoolListItemTraits +{ + using ItemType = PoolPimpl; + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _D3D12MA_POOL_PIMPL + + +#ifndef _D3D12MA_ALLOCATOR_PIMPL +class AllocatorPimpl +{ + friend class Allocator; + friend class Pool; +public: + std::atomic_uint32_t m_RefCount = {1}; + CurrentBudgetData m_Budget; + + AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc); + ~AllocatorPimpl(); + + ID3D12Device* GetDevice() const { return m_Device; } +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + ID3D12Device1* GetDevice1() const { return m_Device1; } +#endif +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* GetDevice4() const { return m_Device4; } +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + ID3D12Device8* GetDevice8() const { return m_Device8; } +#endif + // Shortcut for "Allocation Callbacks", because this function is called so often. + const ALLOCATION_CALLBACKS& GetAllocs() const { return m_AllocationCallbacks; } + const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetD3D12Options() const { return m_D3D12Options; } + BOOL IsUMA() const { return m_D3D12Architecture.UMA; } + BOOL IsCacheCoherentUMA() const { return m_D3D12Architecture.CacheCoherentUMA; } + bool SupportsResourceHeapTier2() const { return m_D3D12Options.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2; } + bool IsGPUUploadHeapSupported() const { return m_GPUUploadHeapSupported != FALSE; } + bool UseMutex() const { return m_UseMutex; } + AllocationObjectAllocator& GetAllocationObjectAllocator() { return m_AllocationObjectAllocator; } + /* + If SupportsResourceHeapTier2(): + 0: D3D12_HEAP_TYPE_DEFAULT + 1: D3D12_HEAP_TYPE_UPLOAD + 2: D3D12_HEAP_TYPE_READBACK + 3: D3D12_HEAP_TYPE_GPU_UPLOAD + else: + 0: D3D12_HEAP_TYPE_DEFAULT + buffer + 1: D3D12_HEAP_TYPE_DEFAULT + texture + 2: D3D12_HEAP_TYPE_DEFAULT + texture RT or DS + 3: D3D12_HEAP_TYPE_UPLOAD + buffer + 4: D3D12_HEAP_TYPE_UPLOAD + texture + 5: D3D12_HEAP_TYPE_UPLOAD + texture RT or DS + 6: D3D12_HEAP_TYPE_READBACK + buffer + 7: D3D12_HEAP_TYPE_READBACK + texture + 8: D3D12_HEAP_TYPE_READBACK + texture RT or DS + 9: D3D12_HEAP_TYPE_GPU_UPLOAD + buffer + 10: D3D12_HEAP_TYPE_GPU_UPLOAD + texture + 11: D3D12_HEAP_TYPE_GPU_UPLOAD + texture RT or DS + */ + UINT GetDefaultPoolCount() const { return STANDARD_HEAP_TYPE_COUNT * (SupportsResourceHeapTier2() ? 1 : 3); } + BlockVector** GetDefaultPools() { return m_BlockVectors; } + + HRESULT Init(const ALLOCATOR_DESC& desc); + bool HeapFlagsFulfillResourceHeapTier(D3D12_HEAP_FLAGS flags) const; + UINT StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapType) const; + UINT HeapPropertiesToMemorySegmentGroup(const D3D12_HEAP_PROPERTIES& heapProps) const; + UINT64 GetMemoryCapacity(UINT memorySegmentGroup) const; + + HRESULT CreatePlacedResourceWrap( + ID3D12Heap *pHeap, + UINT64 HeapOffset, + const CREATE_RESOURCE_PARAMS& createParams, + REFIID riidResource, + void** ppvResource); + + HRESULT CreateResource( + const ALLOCATION_DESC* pAllocDesc, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); + + HRESULT CreateAliasingResource( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const CREATE_RESOURCE_PARAMS& createParams, + REFIID riidResource, + void** ppvResource); + + HRESULT AllocateMemory( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation); + + // Unregisters allocation from the collection of dedicated allocations. + // Allocation object must be deleted externally afterwards. + void FreeCommittedMemory(Allocation* allocation); + // Unregisters allocation from the collection of placed allocations. + // Allocation object must be deleted externally afterwards. + void FreePlacedMemory(Allocation* allocation); + // Unregisters allocation from the collection of dedicated allocations and destroys associated heap. + // Allocation object must be deleted externally afterwards. + void FreeHeapMemory(Allocation* allocation); + + void SetResidencyPriority(ID3D12Pageable* obj, D3D12_RESIDENCY_PRIORITY priority) const; + + // For more deailed stats use outCustomHeaps to access statistics divided into L0 and L1 group + void CalculateStatistics(TotalStatistics& outStats, DetailedStatistics outCustomHeaps[2] = NULL); + + void GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget); + void GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType); + + void BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap); + void FreeStatsString(WCHAR* pStatsString); + +private: + using PoolList = IntrusiveLinkedList; + + const bool m_UseMutex; + const bool m_AlwaysCommitted; + const bool m_MsaaAlwaysCommitted; + const bool m_PreferSmallBuffersCommitted; + bool m_DefaultPoolsNotZeroed = false; + ID3D12Device* m_Device; // AddRef +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + ID3D12Device1* m_Device1 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* m_Device4 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + ID3D12Device8* m_Device8 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + ID3D12Device10* m_Device10 = NULL; // AddRef, optional +#endif +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + ID3D12Device12* m_Device12 = NULL; // AddRef, optional +#endif + IDXGIAdapter* m_Adapter; // AddRef +#if D3D12MA_DXGI_1_4 + IDXGIAdapter3* m_Adapter3 = NULL; // AddRef, optional +#endif + UINT64 m_PreferredBlockSize; + ALLOCATION_CALLBACKS m_AllocationCallbacks; + DXGI_ADAPTER_DESC m_AdapterDesc; + D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options; + BOOL m_GPUUploadHeapSupported = FALSE; + D3D12_FEATURE_DATA_ARCHITECTURE m_D3D12Architecture; + AllocationObjectAllocator m_AllocationObjectAllocator; + + D3D12MA_RW_MUTEX m_PoolsMutex[HEAP_TYPE_COUNT]; + PoolList m_Pools[HEAP_TYPE_COUNT]; + // Default pools. + BlockVector* m_BlockVectors[DEFAULT_POOL_MAX_COUNT]; + CommittedAllocationList m_CommittedAllocations[STANDARD_HEAP_TYPE_COUNT]; + + /* + Heuristics that decides whether a resource should better be placed in its own, + dedicated allocation (committed resource rather than placed resource). + */ + template + bool PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc, + ALLOCATION_FLAGS strategy); + + // Allocates and registers new committed resource with implicit heap, as dedicated allocation. + // Creates and returns Allocation object and optionally D3D12 resource. + HRESULT AllocateCommittedResource( + const CommittedAllocationParameters& committedAllocParams, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, REFIID riidResource, void** ppvResource); + + // Allocates and registers new heap without any resources placed in it, as dedicated allocation. + // Creates and returns Allocation object. + HRESULT AllocateHeap( + const CommittedAllocationParameters& committedAllocParams, + const D3D12_RESOURCE_ALLOCATION_INFO& allocInfo, bool withinBudget, + void* pPrivateData, Allocation** ppAllocation); + + template + HRESULT CalcAllocationParams(const ALLOCATION_DESC& allocDesc, UINT64 allocSize, + const D3D12_RESOURCE_DESC_T* resDesc, // Optional + BlockVector*& outBlockVector, CommittedAllocationParameters& outCommittedAllocationParams, bool& outPreferCommitted); + + // Returns UINT32_MAX if index cannot be calculcated. + UINT CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, ResourceClass resourceClass) const; + void CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_HEAP_FLAGS& outHeapFlags, UINT index) const; + + // Registers Pool object in m_Pools. + void RegisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); + // Unregisters Pool object from m_Pools. + void UnregisterPool(Pool* pool, D3D12_HEAP_TYPE heapType); + + HRESULT UpdateD3D12Budget(); + + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC& resourceDesc, UINT32 NumCastableFormats = 0, + DXGI_FORMAT* pCastableFormats = nullptr) const; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc, UINT32 NumCastableFormats = 0, + DXGI_FORMAT* pCastableFormats = nullptr) const; +#endif + + template + D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc, UINT32 NumCastableFormats = 0, + DXGI_FORMAT* pCastableFormats = nullptr) const; + + bool NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size); + + // Writes object { } with data of given budget. + static void WriteBudgetToJson(JsonWriter& json, const Budget& budget); +}; + +#ifndef _D3D12MA_ALLOCATOR_PIMPL_FUNCTINOS +AllocatorPimpl::AllocatorPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) + : m_UseMutex((desc.Flags & ALLOCATOR_FLAG_SINGLETHREADED) == 0), + m_AlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_ALWAYS_COMMITTED) != 0), + m_MsaaAlwaysCommitted((desc.Flags & ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) != 0), + m_PreferSmallBuffersCommitted((desc.Flags & ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED) == 0), + m_Device(desc.pDevice), + m_Adapter(desc.pAdapter), + m_PreferredBlockSize(desc.PreferredBlockSize != 0 ? desc.PreferredBlockSize : D3D12MA_DEFAULT_BLOCK_SIZE), + m_AllocationCallbacks(allocationCallbacks), + // Below this line don't use allocationCallbacks but m_AllocationCallbacks!!! + m_AllocationObjectAllocator(m_AllocationCallbacks, m_UseMutex) +{ + // desc.pAllocationCallbacks intentionally ignored here, preprocessed by CreateAllocator. + ZeroMemory(&m_D3D12Options, sizeof(m_D3D12Options)); + ZeroMemory(&m_D3D12Architecture, sizeof(m_D3D12Architecture)); + + ZeroMemory(m_BlockVectors, sizeof(m_BlockVectors)); + + for (UINT i = 0; i < STANDARD_HEAP_TYPE_COUNT; ++i) + { + m_CommittedAllocations[i].Init( + m_UseMutex, + IndexToStandardHeapType(i), + NULL); // pool + } + + m_Device->AddRef(); + m_Adapter->AddRef(); +} + +HRESULT AllocatorPimpl::Init(const ALLOCATOR_DESC& desc) +{ +#if D3D12MA_DXGI_1_4 + desc.pAdapter->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Adapter3)); +#endif + +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device1)); +#endif + +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device4)); +#endif + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device8)); + + if((desc.Flags & ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED) != 0) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7 = {}; + if(SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, &options7, sizeof(options7)))) + { + // DEFAULT_POOLS_NOT_ZEROED both supported and enabled by the user. + m_DefaultPoolsNotZeroed = true; + } + } +#endif + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device10)); +#endif + +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + m_Device->QueryInterface(D3D12MA_IID_PPV_ARGS(&m_Device12)); +#endif + + HRESULT hr = m_Adapter->GetDesc(&m_AdapterDesc); + if (FAILED(hr)) + { + return hr; + } + + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &m_D3D12Options, sizeof(m_D3D12Options)); + if (FAILED(hr)) + { + return hr; + } +#ifdef D3D12MA_FORCE_RESOURCE_HEAP_TIER + m_D3D12Options.ResourceHeapTier = (D3D12MA_FORCE_RESOURCE_HEAP_TIER); +#endif + +// You must define this macro to like `#define D3D12MA_HAS_GPU_UPLOAD_HEAP` to enable GPU Upload Heaps! +// Unfortunately there is no way to programmatically check if the included defines D3D12_FEATURE_DATA_D3D12_OPTIONS16 or not. +// Main interfaces have respective macros like __ID3D12Device4_INTERFACE_DEFINED__, but structures like this do not. +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + { + D3D12_FEATURE_DATA_D3D12_OPTIONS16 options16 = {}; + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS16, &options16, sizeof(options16)); + if (SUCCEEDED(hr)) + { + m_GPUUploadHeapSupported = options16.GPUUploadHeapSupported; + } + } +#endif + + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_D3D12Architecture, sizeof(m_D3D12Architecture)); + if (FAILED(hr)) + { + m_D3D12Architecture.UMA = FALSE; + m_D3D12Architecture.CacheCoherentUMA = FALSE; + } + + D3D12_HEAP_PROPERTIES heapProps = {}; + const UINT defaultPoolCount = GetDefaultPoolCount(); + for (UINT i = 0; i < defaultPoolCount; ++i) + { + D3D12_HEAP_FLAGS heapFlags; + CalcDefaultPoolParams(heapProps.Type, heapFlags, i); + +#if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE + if(m_DefaultPoolsNotZeroed) + { + heapFlags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } +#endif + + m_BlockVectors[i] = D3D12MA_NEW(GetAllocs(), BlockVector)( + this, // hAllocator + heapProps, // heapType + heapFlags, // heapFlags + m_PreferredBlockSize, + 0, // minBlockCount + SIZE_MAX, // maxBlockCount + false, // explicitBlockSize + D3D12MA_DEBUG_ALIGNMENT, // minAllocationAlignment + 0, // Default algorithm, + m_MsaaAlwaysCommitted, + NULL, // pProtectedSession + D3D12_RESIDENCY_PRIORITY_NONE); // residencyPriority + // No need to call m_pBlockVectors[i]->CreateMinBlocks here, becase minBlockCount is 0. + } + +#if D3D12MA_DXGI_1_4 + UpdateD3D12Budget(); +#endif + + return S_OK; +} + +AllocatorPimpl::~AllocatorPimpl() +{ +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device12); +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device10); +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device8); +#endif +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device4); +#endif +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + SAFE_RELEASE(m_Device1); +#endif +#if D3D12MA_DXGI_1_4 + SAFE_RELEASE(m_Adapter3); +#endif + SAFE_RELEASE(m_Adapter); + SAFE_RELEASE(m_Device); + + for (UINT i = DEFAULT_POOL_MAX_COUNT; i--; ) + { + D3D12MA_DELETE(GetAllocs(), m_BlockVectors[i]); + } + + for (UINT i = HEAP_TYPE_COUNT; i--; ) + { + if (!m_Pools[i].IsEmpty()) + { + D3D12MA_ASSERT(0 && "Unfreed pools found!"); + } + } +} + +bool AllocatorPimpl::HeapFlagsFulfillResourceHeapTier(D3D12_HEAP_FLAGS flags) const +{ + if (SupportsResourceHeapTier2()) + { + return true; + } + else + { + const bool allowBuffers = (flags & D3D12_HEAP_FLAG_DENY_BUFFERS) == 0; + const bool allowRtDsTextures = (flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) == 0; + const bool allowNonRtDsTextures = (flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) == 0; + const uint8_t allowedGroupCount = (allowBuffers ? 1 : 0) + (allowRtDsTextures ? 1 : 0) + (allowNonRtDsTextures ? 1 : 0); + return allowedGroupCount == 1; + } +} + +UINT AllocatorPimpl::StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE heapType) const +{ + D3D12MA_ASSERT(IsHeapTypeStandard(heapType)); + if (IsUMA()) + return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; + return ( + heapType == D3D12_HEAP_TYPE_DEFAULT +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + || heapType == D3D12_HEAP_TYPE_GPU_UPLOAD +#endif + ) ? DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; +} + +UINT AllocatorPimpl::HeapPropertiesToMemorySegmentGroup(const D3D12_HEAP_PROPERTIES& heapProps) const +{ + if (IsUMA()) + return DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY; + if (heapProps.MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN) + return StandardHeapTypeToMemorySegmentGroup(heapProps.Type); + return heapProps.MemoryPoolPreference == D3D12_MEMORY_POOL_L1 ? + DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY : DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY; +} + +UINT64 AllocatorPimpl::GetMemoryCapacity(UINT memorySegmentGroup) const +{ + switch (memorySegmentGroup) + { + case DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY: + return IsUMA() ? + m_AdapterDesc.DedicatedVideoMemory + m_AdapterDesc.SharedSystemMemory : m_AdapterDesc.DedicatedVideoMemory; + case DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY: + return IsUMA() ? 0 : m_AdapterDesc.SharedSystemMemory; + default: + D3D12MA_ASSERT(0); + return UINT64_MAX; + } +} + +HRESULT AllocatorPimpl::CreatePlacedResourceWrap( + ID3D12Heap *pHeap, + UINT64 HeapOffset, + const CREATE_RESOURCE_PARAMS& createParams, + REFIID riidResource, + void** ppvResource) +{ +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) + { + if (!m_Device10) + { + return E_NOINTERFACE; + } + return m_Device10->CreatePlacedResource2(pHeap, HeapOffset, + createParams.GetResourceDesc1(), createParams.GetInitialLayout(), + createParams.GetOptimizedClearValue(), createParams.GetNumCastableFormats(), + createParams.GetCastableFormats(), riidResource, ppvResource); + } else +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) + { + if (!m_Device8) + { + return E_NOINTERFACE; + } + return m_Device8->CreatePlacedResource1(pHeap, HeapOffset, + createParams.GetResourceDesc1(), createParams.GetInitialResourceState(), + createParams.GetOptimizedClearValue(), riidResource, ppvResource); + } else +#endif + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE) + { + return m_Device->CreatePlacedResource(pHeap, HeapOffset, + createParams.GetResourceDesc(), createParams.GetInitialResourceState(), + createParams.GetOptimizedClearValue(), riidResource, ppvResource); + } + else + { + D3D12MA_ASSERT(0); + return E_INVALIDARG; + } +} + + +HRESULT AllocatorPimpl::CreateResource( + const ALLOCATION_DESC* pAllocDesc, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + D3D12MA_ASSERT(pAllocDesc && createParams.GetBaseResourceDesc() && ppAllocation); + + *ppAllocation = NULL; + if (ppvResource) + { + *ppvResource = NULL; + } + + CREATE_RESOURCE_PARAMS finalCreateParams = createParams; + D3D12_RESOURCE_DESC finalResourceDesc; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + D3D12_RESOURCE_DESC1 finalResourceDesc1; +#endif + D3D12_RESOURCE_ALLOCATION_INFO resAllocInfo; + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE) + { + finalResourceDesc = *createParams.GetResourceDesc(); + finalCreateParams.AccessResourceDesc() = &finalResourceDesc; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc); + } +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) + { + if (!m_Device8) + { + return E_NOINTERFACE; + } + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); + } +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) + { + if (!m_Device10) + { + return E_NOINTERFACE; + } + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); + } +#endif + else + { + D3D12MA_ASSERT(0); + return E_INVALIDARG; + } + D3D12MA_ASSERT(IsPow2(resAllocInfo.Alignment)); + D3D12MA_ASSERT(resAllocInfo.SizeInBytes > 0); + + BlockVector* blockVector = NULL; + CommittedAllocationParameters committedAllocationParams = {}; + bool preferCommitted = false; + + HRESULT hr; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (createParams.Variant >= CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) + { + hr = CalcAllocationParams(*pAllocDesc, resAllocInfo.SizeInBytes, + createParams.GetResourceDesc1(), + blockVector, committedAllocationParams, preferCommitted); + } + else +#endif + { + hr = CalcAllocationParams(*pAllocDesc, resAllocInfo.SizeInBytes, + createParams.GetResourceDesc(), + blockVector, committedAllocationParams, preferCommitted); + } + if (FAILED(hr)) + return hr; + + const bool withinBudget = (pAllocDesc->Flags & ALLOCATION_FLAG_WITHIN_BUDGET) != 0; + hr = E_INVALIDARG; + if (committedAllocationParams.IsValid() && preferCommitted) + { + hr = AllocateCommittedResource(committedAllocationParams, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + finalCreateParams, ppAllocation, riidResource, ppvResource); + if (SUCCEEDED(hr)) + return hr; + } + if (blockVector != NULL) + { + hr = blockVector->CreateResource(resAllocInfo.SizeInBytes, resAllocInfo.Alignment, + *pAllocDesc, finalCreateParams, + ppAllocation, riidResource, ppvResource); + if (SUCCEEDED(hr)) + return hr; + } + if (committedAllocationParams.IsValid() && !preferCommitted) + { + hr = AllocateCommittedResource(committedAllocationParams, + resAllocInfo.SizeInBytes, withinBudget, pAllocDesc->pPrivateData, + finalCreateParams, ppAllocation, riidResource, ppvResource); + if (SUCCEEDED(hr)) + return hr; + } + return hr; +} + +HRESULT AllocatorPimpl::AllocateMemory( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation) +{ + *ppAllocation = NULL; + + BlockVector* blockVector = NULL; + CommittedAllocationParameters committedAllocationParams = {}; + bool preferCommitted = false; + HRESULT hr = CalcAllocationParams(*pAllocDesc, pAllocInfo->SizeInBytes, + NULL, // pResDesc + blockVector, committedAllocationParams, preferCommitted); + if (FAILED(hr)) + return hr; + + const bool withinBudget = (pAllocDesc->Flags & ALLOCATION_FLAG_WITHIN_BUDGET) != 0; + hr = E_INVALIDARG; + if (committedAllocationParams.IsValid() && preferCommitted) + { + hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, pAllocDesc->pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) + return hr; + } + if (blockVector != NULL) + { + hr = blockVector->Allocate(pAllocInfo->SizeInBytes, pAllocInfo->Alignment, + *pAllocDesc, 1, (Allocation**)ppAllocation); + if (SUCCEEDED(hr)) + return hr; + } + if (committedAllocationParams.IsValid() && !preferCommitted) + { + hr = AllocateHeap(committedAllocationParams, *pAllocInfo, withinBudget, pAllocDesc->pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) + return hr; + } + return hr; +} + +HRESULT AllocatorPimpl::CreateAliasingResource( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const CREATE_RESOURCE_PARAMS& createParams, + REFIID riidResource, + void** ppvResource) +{ + *ppvResource = NULL; + + CREATE_RESOURCE_PARAMS finalCreateParams = createParams; + D3D12_RESOURCE_DESC finalResourceDesc; +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + D3D12_RESOURCE_DESC1 finalResourceDesc1; +#endif + D3D12_RESOURCE_ALLOCATION_INFO resAllocInfo; + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE) + { + finalResourceDesc = *createParams.GetResourceDesc(); + finalCreateParams.AccessResourceDesc() = &finalResourceDesc; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc); + } +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) + { + if (!m_Device8) + { + return E_NOINTERFACE; + } + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1); + } +#endif +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + else if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) + { + if (!m_Device10) + { + return E_NOINTERFACE; + } + finalResourceDesc1 = *createParams.GetResourceDesc1(); + finalCreateParams.AccessResourceDesc1() = &finalResourceDesc1; + resAllocInfo = GetResourceAllocationInfo(finalResourceDesc1, createParams.GetNumCastableFormats(), createParams.GetCastableFormats()); + } +#endif + else + { + D3D12MA_ASSERT(0); + return E_INVALIDARG; + } + D3D12MA_ASSERT(IsPow2(resAllocInfo.Alignment)); + D3D12MA_ASSERT(resAllocInfo.SizeInBytes > 0); + + ID3D12Heap* const existingHeap = pAllocation->GetHeap(); + const UINT64 existingOffset = pAllocation->GetOffset(); + const UINT64 existingSize = pAllocation->GetSize(); + const UINT64 newOffset = existingOffset + AllocationLocalOffset; + + if (existingHeap == NULL || + AllocationLocalOffset + resAllocInfo.SizeInBytes > existingSize || + newOffset % resAllocInfo.Alignment != 0) + { + return E_INVALIDARG; + } + + return CreatePlacedResourceWrap(existingHeap, newOffset, finalCreateParams, riidResource, ppvResource); +} + +void AllocatorPimpl::FreeCommittedMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_COMMITTED); + + CommittedAllocationList* const allocList = allocation->m_Committed.list; + allocList->Unregister(allocation); + + const UINT memSegmentGroup = allocList->GetMemorySegmentGroup(this); + const UINT64 allocSize = allocation->GetSize(); + m_Budget.RemoveAllocation(memSegmentGroup, allocSize); + m_Budget.RemoveBlock(memSegmentGroup, allocSize); +} + +void AllocatorPimpl::FreePlacedMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_PLACED); + + NormalBlock* const block = allocation->m_Placed.block; + D3D12MA_ASSERT(block); + BlockVector* const blockVector = block->GetBlockVector(); + D3D12MA_ASSERT(blockVector); + m_Budget.RemoveAllocation(HeapPropertiesToMemorySegmentGroup(block->GetHeapProperties()), allocation->GetSize()); + blockVector->Free(allocation); +} + +void AllocatorPimpl::FreeHeapMemory(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation && allocation->m_PackedData.GetType() == Allocation::TYPE_HEAP); + + CommittedAllocationList* const allocList = allocation->m_Committed.list; + allocList->Unregister(allocation); + SAFE_RELEASE(allocation->m_Heap.heap); + + const UINT memSegmentGroup = allocList->GetMemorySegmentGroup(this); + const UINT64 allocSize = allocation->GetSize(); + m_Budget.RemoveAllocation(memSegmentGroup, allocSize); + m_Budget.RemoveBlock(memSegmentGroup, allocSize); +} + +void AllocatorPimpl::SetResidencyPriority(ID3D12Pageable* obj, D3D12_RESIDENCY_PRIORITY priority) const +{ +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + if (priority != D3D12_RESIDENCY_PRIORITY_NONE && m_Device1) + { + // Intentionally ignoring the result. + m_Device1->SetResidencyPriority(1, &obj, &priority); + } +#endif +} + +void AllocatorPimpl::CalculateStatistics(TotalStatistics& outStats, DetailedStatistics outCustomHeaps[2]) +{ + // Init stats + for (size_t i = 0; i < HEAP_TYPE_COUNT; i++) + ClearDetailedStatistics(outStats.HeapType[i]); + for (size_t i = 0; i < DXGI_MEMORY_SEGMENT_GROUP_COUNT; i++) + ClearDetailedStatistics(outStats.MemorySegmentGroup[i]); + ClearDetailedStatistics(outStats.Total); + if (outCustomHeaps) + { + ClearDetailedStatistics(outCustomHeaps[0]); + ClearDetailedStatistics(outCustomHeaps[1]); + } + + // Process default pools. 4 standard heap types only. Add them to outStats.HeapType[i]. + if (SupportsResourceHeapTier2()) + { + // DEFAULT, UPLOAD, READBACK, GPU_UPLOAD. + for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex]; + D3D12MA_ASSERT(pBlockVector); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); + } + } + else + { + // DEFAULT, UPLOAD, READBACK. + for (size_t heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + for (size_t heapSubType = 0; heapSubType < 3; ++heapSubType) + { + BlockVector* const pBlockVector = m_BlockVectors[heapTypeIndex * 3 + heapSubType]; + D3D12MA_ASSERT(pBlockVector); + + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + pBlockVector->AddDetailedStatistics(outStats.HeapType[outputIndex]); + } + } + } + + // Sum them up to memory segment groups. + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_DEFAULT)], + outStats.HeapType[0]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_UPLOAD)], + outStats.HeapType[1]); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_READBACK)], + outStats.HeapType[2]); +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(D3D12_HEAP_TYPE_GPU_UPLOAD)], + outStats.HeapType[4]); +#endif + + // Process custom pools. + DetailedStatistics tmpStats; + for (size_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) + { + MutexLockRead lock(m_PoolsMutex[heapTypeIndex], m_UseMutex); + PoolList& poolList = m_Pools[heapTypeIndex]; + for (PoolPimpl* pool = poolList.Front(); pool != NULL; pool = poolList.GetNext(pool)) + { + const D3D12_HEAP_PROPERTIES& poolHeapProps = pool->GetDesc().HeapProperties; + ClearDetailedStatistics(tmpStats); + pool->AddDetailedStatistics(tmpStats); + AddDetailedStatistics( + outStats.HeapType[heapTypeIndex], tmpStats); + + UINT memorySegment = HeapPropertiesToMemorySegmentGroup(poolHeapProps); + AddDetailedStatistics( + outStats.MemorySegmentGroup[memorySegment], tmpStats); + + if (outCustomHeaps) + AddDetailedStatistics(outCustomHeaps[memorySegment], tmpStats); + } + } + + // Process committed allocations. standard heap types only. + for (UINT heapTypeIndex = 0; heapTypeIndex < STANDARD_HEAP_TYPE_COUNT; ++heapTypeIndex) + { + ClearDetailedStatistics(tmpStats); + m_CommittedAllocations[heapTypeIndex].AddDetailedStatistics(tmpStats); + const size_t outputIndex = heapTypeIndex < 3 ? heapTypeIndex : 4; // GPU_UPLOAD 3 -> 4 + AddDetailedStatistics( + outStats.HeapType[outputIndex], tmpStats); + AddDetailedStatistics( + outStats.MemorySegmentGroup[StandardHeapTypeToMemorySegmentGroup(IndexToStandardHeapType(heapTypeIndex))], tmpStats); + } + + // Sum up memory segment groups to totals. + AddDetailedStatistics(outStats.Total, outStats.MemorySegmentGroup[0]); + AddDetailedStatistics(outStats.Total, outStats.MemorySegmentGroup[1]); + + D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == + outStats.MemorySegmentGroup[0].Stats.BlockCount + outStats.MemorySegmentGroup[1].Stats.BlockCount); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == + outStats.MemorySegmentGroup[0].Stats.AllocationCount + outStats.MemorySegmentGroup[1].Stats.AllocationCount); + D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == + outStats.MemorySegmentGroup[0].Stats.BlockBytes + outStats.MemorySegmentGroup[1].Stats.BlockBytes); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == + outStats.MemorySegmentGroup[0].Stats.AllocationBytes + outStats.MemorySegmentGroup[1].Stats.AllocationBytes); + D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == + outStats.MemorySegmentGroup[0].UnusedRangeCount + outStats.MemorySegmentGroup[1].UnusedRangeCount); + + D3D12MA_ASSERT(outStats.Total.Stats.BlockCount == + outStats.HeapType[0].Stats.BlockCount + outStats.HeapType[1].Stats.BlockCount + + outStats.HeapType[2].Stats.BlockCount + outStats.HeapType[3].Stats.BlockCount + + outStats.HeapType[4].Stats.BlockCount); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationCount == + outStats.HeapType[0].Stats.AllocationCount + outStats.HeapType[1].Stats.AllocationCount + + outStats.HeapType[2].Stats.AllocationCount + outStats.HeapType[3].Stats.AllocationCount + + outStats.HeapType[4].Stats.AllocationCount); + D3D12MA_ASSERT(outStats.Total.Stats.BlockBytes == + outStats.HeapType[0].Stats.BlockBytes + outStats.HeapType[1].Stats.BlockBytes + + outStats.HeapType[2].Stats.BlockBytes + outStats.HeapType[3].Stats.BlockBytes + + outStats.HeapType[4].Stats.BlockBytes); + D3D12MA_ASSERT(outStats.Total.Stats.AllocationBytes == + outStats.HeapType[0].Stats.AllocationBytes + outStats.HeapType[1].Stats.AllocationBytes + + outStats.HeapType[2].Stats.AllocationBytes + outStats.HeapType[3].Stats.AllocationBytes + + outStats.HeapType[4].Stats.AllocationBytes); + D3D12MA_ASSERT(outStats.Total.UnusedRangeCount == + outStats.HeapType[0].UnusedRangeCount + outStats.HeapType[1].UnusedRangeCount + + outStats.HeapType[2].UnusedRangeCount + outStats.HeapType[3].UnusedRangeCount + + outStats.HeapType[4].UnusedRangeCount); +} + +void AllocatorPimpl::GetBudget(Budget* outLocalBudget, Budget* outNonLocalBudget) +{ + if (outLocalBudget) + m_Budget.GetStatistics(outLocalBudget->Stats, DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY); + if (outNonLocalBudget) + m_Budget.GetStatistics(outNonLocalBudget->Stats, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY); + +#if D3D12MA_DXGI_1_4 + if (m_Adapter3) + { + if (!m_Budget.ShouldUpdateBudget()) + { + m_Budget.GetBudget(m_UseMutex, + outLocalBudget ? &outLocalBudget->UsageBytes : NULL, + outLocalBudget ? &outLocalBudget->BudgetBytes : NULL, + outNonLocalBudget ? &outNonLocalBudget->UsageBytes : NULL, + outNonLocalBudget ? &outNonLocalBudget->BudgetBytes : NULL); + } + else + { + UpdateD3D12Budget(); + GetBudget(outLocalBudget, outNonLocalBudget); // Recursion + } + } + else +#endif + { + if (outLocalBudget) + { + outLocalBudget->UsageBytes = outLocalBudget->Stats.BlockBytes; + outLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL_COPY) * 8 / 10; // 80% heuristics. + } + if (outNonLocalBudget) + { + outNonLocalBudget->UsageBytes = outNonLocalBudget->Stats.BlockBytes; + outNonLocalBudget->BudgetBytes = GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL_COPY) * 8 / 10; // 80% heuristics. + } + } +} + +void AllocatorPimpl::GetBudgetForHeapType(Budget& outBudget, D3D12_HEAP_TYPE heapType) +{ + switch (heapType) + { + case D3D12_HEAP_TYPE_DEFAULT: +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + case D3D12_HEAP_TYPE_GPU_UPLOAD: +#endif + GetBudget(&outBudget, NULL); + break; + case D3D12_HEAP_TYPE_UPLOAD: + case D3D12_HEAP_TYPE_READBACK: + GetBudget(NULL, &outBudget); + break; + default: D3D12MA_ASSERT(0); + } +} + +void AllocatorPimpl::BuildStatsString(WCHAR** ppStatsString, BOOL detailedMap) +{ + StringBuilder sb(GetAllocs()); + { + Budget localBudget = {}, nonLocalBudget = {}; + GetBudget(&localBudget, &nonLocalBudget); + + TotalStatistics stats; + DetailedStatistics customHeaps[2]; + CalculateStatistics(stats, customHeaps); + + JsonWriter json(GetAllocs(), sb); + json.BeginObject(); + { + json.WriteString(L"General"); + json.BeginObject(); + { + json.WriteString(L"API"); + json.WriteString(L"Direct3D 12"); + + json.WriteString(L"GPU"); + json.WriteString(m_AdapterDesc.Description); + + json.WriteString(L"DedicatedVideoMemory"); + json.WriteNumber((UINT64)m_AdapterDesc.DedicatedVideoMemory); + json.WriteString(L"DedicatedSystemMemory"); + json.WriteNumber((UINT64)m_AdapterDesc.DedicatedSystemMemory); + json.WriteString(L"SharedSystemMemory"); + json.WriteNumber((UINT64)m_AdapterDesc.SharedSystemMemory); + + json.WriteString(L"ResourceHeapTier"); + json.WriteNumber(static_cast(m_D3D12Options.ResourceHeapTier)); + + json.WriteString(L"ResourceBindingTier"); + json.WriteNumber(static_cast(m_D3D12Options.ResourceBindingTier)); + + json.WriteString(L"TiledResourcesTier"); + json.WriteNumber(static_cast(m_D3D12Options.TiledResourcesTier)); + + json.WriteString(L"TileBasedRenderer"); + json.WriteBool(m_D3D12Architecture.TileBasedRenderer); + + json.WriteString(L"UMA"); + json.WriteBool(m_D3D12Architecture.UMA); + json.WriteString(L"CacheCoherentUMA"); + json.WriteBool(m_D3D12Architecture.CacheCoherentUMA); + + json.WriteString(L"GPUUploadHeapSupported"); + json.WriteBool(m_GPUUploadHeapSupported != FALSE); + } + json.EndObject(); + } + { + json.WriteString(L"Total"); + json.AddDetailedStatisticsInfoObject(stats.Total); + } + { + json.WriteString(L"MemoryInfo"); + json.BeginObject(); + { + json.WriteString(L"L0"); + json.BeginObject(); + { + json.WriteString(L"Budget"); + WriteBudgetToJson(json, IsUMA() ? localBudget : nonLocalBudget); // When UMA device only L0 present as local + + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.MemorySegmentGroup[!IsUMA()]); + + json.WriteString(L"MemoryPools"); + json.BeginObject(); + { + if (IsUMA()) + { + json.WriteString(L"DEFAULT"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); + } + json.EndObject(); + + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } + } + json.WriteString(L"UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[1]); + } + json.EndObject(); + + json.WriteString(L"READBACK"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[2]); + } + json.EndObject(); + + json.WriteString(L"CUSTOM"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(customHeaps[!IsUMA()]); + } + json.EndObject(); + } + json.EndObject(); + } + json.EndObject(); + if (!IsUMA()) + { + json.WriteString(L"L1"); + json.BeginObject(); + { + json.WriteString(L"Budget"); + WriteBudgetToJson(json, localBudget); + + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.MemorySegmentGroup[0]); + + json.WriteString(L"MemoryPools"); + json.BeginObject(); + { + json.WriteString(L"DEFAULT"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[0]); + } + json.EndObject(); + + if(IsGPUUploadHeapSupported()) + { + json.WriteString(L"GPU_UPLOAD"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(stats.HeapType[4]); + } + json.EndObject(); + } + + json.WriteString(L"CUSTOM"); + json.BeginObject(); + { + json.WriteString(L"Stats"); + json.AddDetailedStatisticsInfoObject(customHeaps[0]); + } + json.EndObject(); + } + json.EndObject(); + } + json.EndObject(); + } + } + json.EndObject(); + } + + if (detailedMap) + { + const auto writeHeapInfo = [&](BlockVector* blockVector, CommittedAllocationList* committedAllocs, bool customHeap) + { + D3D12MA_ASSERT(blockVector); + + D3D12_HEAP_FLAGS flags = blockVector->GetHeapFlags(); + json.WriteString(L"Flags"); + json.BeginArray(true); + { + if (flags & D3D12_HEAP_FLAG_SHARED) + json.WriteString(L"HEAP_FLAG_SHARED"); + if (flags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) + json.WriteString(L"HEAP_FLAG_ALLOW_DISPLAY"); + if (flags & D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER) + json.WriteString(L"HEAP_FLAG_CROSS_ADAPTER"); + if (flags & D3D12_HEAP_FLAG_HARDWARE_PROTECTED) + json.WriteString(L"HEAP_FLAG_HARDWARE_PROTECTED"); + if (flags & D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH) + json.WriteString(L"HEAP_FLAG_ALLOW_WRITE_WATCH"); + if (flags & D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS) + json.WriteString(L"HEAP_FLAG_ALLOW_SHADER_ATOMICS"); +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (flags & D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT) + json.WriteString(L"HEAP_FLAG_CREATE_NOT_RESIDENT"); + if (flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + json.WriteString(L"HEAP_FLAG_CREATE_NOT_ZEROED"); +#endif + + if (flags & D3D12_HEAP_FLAG_DENY_BUFFERS) + json.WriteString(L"HEAP_FLAG_DENY_BUFFERS"); + if (flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) + json.WriteString(L"HEAP_FLAG_DENY_RT_DS_TEXTURES"); + if (flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) + json.WriteString(L"HEAP_FLAG_DENY_NON_RT_DS_TEXTURES"); + + flags &= ~(D3D12_HEAP_FLAG_SHARED + | D3D12_HEAP_FLAG_DENY_BUFFERS + | D3D12_HEAP_FLAG_ALLOW_DISPLAY + | D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER + | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES + | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES + | D3D12_HEAP_FLAG_HARDWARE_PROTECTED + | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH + | D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS); +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + flags &= ~(D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT + | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED); +#endif + if (flags != 0) + json.WriteNumber((UINT)flags); + + if (customHeap) + { + const D3D12_HEAP_PROPERTIES& properties = blockVector->GetHeapProperties(); + switch (properties.MemoryPoolPreference) + { + default: + D3D12MA_ASSERT(0); + case D3D12_MEMORY_POOL_UNKNOWN: + json.WriteString(L"MEMORY_POOL_UNKNOWN"); + break; + case D3D12_MEMORY_POOL_L0: + json.WriteString(L"MEMORY_POOL_L0"); + break; + case D3D12_MEMORY_POOL_L1: + json.WriteString(L"MEMORY_POOL_L1"); + break; + } + switch (properties.CPUPageProperty) + { + default: + D3D12MA_ASSERT(0); + case D3D12_CPU_PAGE_PROPERTY_UNKNOWN: + json.WriteString(L"CPU_PAGE_PROPERTY_UNKNOWN"); + break; + case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE: + json.WriteString(L"CPU_PAGE_PROPERTY_NOT_AVAILABLE"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE: + json.WriteString(L"CPU_PAGE_PROPERTY_WRITE_COMBINE"); + break; + case D3D12_CPU_PAGE_PROPERTY_WRITE_BACK: + json.WriteString(L"CPU_PAGE_PROPERTY_WRITE_BACK"); + break; + } + } + } + json.EndArray(); + + json.WriteString(L"PreferredBlockSize"); + json.WriteNumber(blockVector->GetPreferredBlockSize()); + + json.WriteString(L"Blocks"); + blockVector->WriteBlockInfoToJson(json); + + json.WriteString(L"DedicatedAllocations"); + json.BeginArray(); + if (committedAllocs) + committedAllocs->BuildStatsString(json); + json.EndArray(); + }; + + json.WriteString(L"DefaultPools"); + json.BeginObject(); + { + if (SupportsResourceHeapTier2()) + { + for (uint8_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) + { + json.WriteString(StandardHeapTypeNames[heapType]); + json.BeginObject(); + writeHeapInfo(m_BlockVectors[heapType], m_CommittedAllocations + heapType, false); + json.EndObject(); + } + } + else + { + for (uint8_t heapType = 0; heapType < STANDARD_HEAP_TYPE_COUNT; ++heapType) + { + for (uint8_t heapSubType = 0; heapSubType < 3; ++heapSubType) + { + static const WCHAR* const heapSubTypeName[] = { + L" - Buffers", + L" - Textures", + L" - Textures RT/DS", + }; + json.BeginString(StandardHeapTypeNames[heapType]); + json.EndString(heapSubTypeName[heapSubType]); + + json.BeginObject(); + writeHeapInfo(m_BlockVectors[heapType * 3 + heapSubType], m_CommittedAllocations + heapType, false); + json.EndObject(); + } + } + } + } + json.EndObject(); + + json.WriteString(L"CustomPools"); + json.BeginObject(); + for (uint8_t heapTypeIndex = 0; heapTypeIndex < HEAP_TYPE_COUNT; ++heapTypeIndex) + { + MutexLockRead mutex(m_PoolsMutex[heapTypeIndex], m_UseMutex); + auto* item = m_Pools[heapTypeIndex].Front(); + if (item != NULL) + { + size_t index = 0; + json.WriteString(HeapTypeNames[heapTypeIndex]); + json.BeginArray(); + do + { + json.BeginObject(); + json.WriteString(L"Name"); + json.BeginString(); + json.ContinueString(index++); + if (item->GetName()) + { + json.ContinueString(L" - "); + json.ContinueString(item->GetName()); + } + json.EndString(); + + writeHeapInfo(item->GetBlockVector(), item->GetCommittedAllocationList(), heapTypeIndex == 3); + json.EndObject(); + } while ((item = PoolList::GetNext(item)) != NULL); + json.EndArray(); + } + } + json.EndObject(); + } + json.EndObject(); + } + + const size_t length = sb.GetLength(); + WCHAR* result = AllocateArray(GetAllocs(), length + 2); + result[0] = 0xFEFF; + memcpy(result + 1, sb.GetData(), length * sizeof(WCHAR)); + result[length + 1] = L'\0'; + *ppStatsString = result; +} + +void AllocatorPimpl::FreeStatsString(WCHAR* pStatsString) +{ + D3D12MA_ASSERT(pStatsString); + Free(GetAllocs(), pStatsString); +} + +template +bool AllocatorPimpl::PrefersCommittedAllocation(const D3D12_RESOURCE_DESC_T& resourceDesc, + ALLOCATION_FLAGS strategy) +{ + // Prefer creating small buffers <= 32 KB as committed, because drivers pack them better, + // while placed buffers require 64 KB alignment. + if(resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + resourceDesc.Width <= D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT / 2 && + strategy != ALLOCATION_FLAG_STRATEGY_MIN_TIME && // Creating as committed would be slower. + m_PreferSmallBuffersCommitted) + { + return true; + } + + // Intentional. It may change in the future. + return false; +} + +HRESULT AllocatorPimpl::AllocateCommittedResource( + const CommittedAllocationParameters& committedAllocParams, + UINT64 resourceSize, bool withinBudget, void* pPrivateData, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, REFIID riidResource, void** ppvResource) +{ + D3D12MA_ASSERT(committedAllocParams.IsValid()); + + HRESULT hr; + ID3D12Resource* res = NULL; + // Allocate aliasing memory with explicit heap + if (committedAllocParams.m_CanAlias) + { + D3D12_RESOURCE_ALLOCATION_INFO heapAllocInfo = {}; + heapAllocInfo.SizeInBytes = resourceSize; + heapAllocInfo.Alignment = HeapFlagsToAlignment(committedAllocParams.m_HeapFlags, m_MsaaAlwaysCommitted); + hr = AllocateHeap(committedAllocParams, heapAllocInfo, withinBudget, pPrivateData, ppAllocation); + if (SUCCEEDED(hr)) + { + hr = CreatePlacedResourceWrap((*ppAllocation)->GetHeap(), 0, + createParams, D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + hr = res->QueryInterface(riidResource, ppvResource); + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, createParams.GetBaseResourceDesc()); + return hr; + } + res->Release(); + } + FreeHeapMemory(*ppAllocation); + } + return hr; + } + + if (withinBudget && + !NewAllocationWithinBudget(committedAllocParams.m_HeapProperties.Type, resourceSize)) + { + return E_OUTOFMEMORY; + } + + /* D3D12 ERROR: + * ID3D12Device::CreateCommittedResource: + * When creating a committed resource, D3D12_HEAP_FLAGS must not have either + * D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES, + * D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES, + * nor D3D12_HEAP_FLAG_DENY_BUFFERS set. + * These flags will be set automatically to correspond with the committed resource type. + * + * [ STATE_CREATION ERROR #640: CREATERESOURCEANDHEAP_INVALIDHEAPMISCFLAGS] + */ + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_LAYOUT) + { + if (!m_Device10) + { + return E_NOINTERFACE; + } + hr = m_Device10->CreateCommittedResource3( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + createParams.GetResourceDesc1(), createParams.GetInitialLayout(), + createParams.GetOptimizedClearValue(), committedAllocParams.m_ProtectedSession, + createParams.GetNumCastableFormats(), createParams.GetCastableFormats(), + D3D12MA_IID_PPV_ARGS(&res)); + } else +#endif +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE_AND_DESC1) + { + if (!m_Device8) + { + return E_NOINTERFACE; + } + hr = m_Device8->CreateCommittedResource2( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + createParams.GetResourceDesc1(), createParams.GetInitialResourceState(), + createParams.GetOptimizedClearValue(), committedAllocParams.m_ProtectedSession, + D3D12MA_IID_PPV_ARGS(&res)); + } else +#endif + if (createParams.Variant == CREATE_RESOURCE_PARAMS::VARIANT_WITH_STATE) + { +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + if (m_Device4) + { + hr = m_Device4->CreateCommittedResource1( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + createParams.GetResourceDesc(), createParams.GetInitialResourceState(), + createParams.GetOptimizedClearValue(), committedAllocParams.m_ProtectedSession, + D3D12MA_IID_PPV_ARGS(&res)); + } + else +#endif + { + if (committedAllocParams.m_ProtectedSession == NULL) + { + hr = m_Device->CreateCommittedResource( + &committedAllocParams.m_HeapProperties, + committedAllocParams.m_HeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS, + createParams.GetResourceDesc(), createParams.GetInitialResourceState(), + createParams.GetOptimizedClearValue(), D3D12MA_IID_PPV_ARGS(&res)); + } + else + hr = E_NOINTERFACE; + } + } + else + { + D3D12MA_ASSERT(0); + return E_INVALIDARG; + } + + if (SUCCEEDED(hr)) + { + SetResidencyPriority(res, committedAllocParams.m_ResidencyPriority); + + if (ppvResource != NULL) + { + hr = res->QueryInterface(riidResource, ppvResource); + } + if (SUCCEEDED(hr)) + { + Allocation* alloc = m_AllocationObjectAllocator.Allocate( + this, resourceSize, createParams.GetBaseResourceDesc()->Alignment); + alloc->InitCommitted(committedAllocParams.m_List); + alloc->SetResourcePointer(res, createParams.GetBaseResourceDesc()); + alloc->SetPrivateData(pPrivateData); + + *ppAllocation = alloc; + + committedAllocParams.m_List->Register(alloc); + + const UINT memSegmentGroup = HeapPropertiesToMemorySegmentGroup(committedAllocParams.m_HeapProperties); + m_Budget.AddBlock(memSegmentGroup, resourceSize); + m_Budget.AddAllocation(memSegmentGroup, resourceSize); + } + else + { + res->Release(); + } + } + return hr; +} + +HRESULT AllocatorPimpl::AllocateHeap( + const CommittedAllocationParameters& committedAllocParams, + const D3D12_RESOURCE_ALLOCATION_INFO& allocInfo, bool withinBudget, + void* pPrivateData, Allocation** ppAllocation) +{ + D3D12MA_ASSERT(committedAllocParams.IsValid()); + + *ppAllocation = nullptr; + + if (withinBudget && + !NewAllocationWithinBudget(committedAllocParams.m_HeapProperties.Type, allocInfo.SizeInBytes)) + { + return E_OUTOFMEMORY; + } + + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.SizeInBytes = allocInfo.SizeInBytes; + heapDesc.Properties = committedAllocParams.m_HeapProperties; + heapDesc.Alignment = allocInfo.Alignment; + heapDesc.Flags = committedAllocParams.m_HeapFlags; + + HRESULT hr; + ID3D12Heap* heap = nullptr; +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + if (m_Device4) + hr = m_Device4->CreateHeap1(&heapDesc, committedAllocParams.m_ProtectedSession, D3D12MA_IID_PPV_ARGS(&heap)); + else +#endif + { + if (committedAllocParams.m_ProtectedSession == NULL) + hr = m_Device->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&heap)); + else + hr = E_NOINTERFACE; + } + + if (SUCCEEDED(hr)) + { + SetResidencyPriority(heap, committedAllocParams.m_ResidencyPriority); + (*ppAllocation) = m_AllocationObjectAllocator.Allocate(this, allocInfo.SizeInBytes, allocInfo.Alignment); + (*ppAllocation)->InitHeap(committedAllocParams.m_List, heap); + (*ppAllocation)->SetPrivateData(pPrivateData); + committedAllocParams.m_List->Register(*ppAllocation); + + const UINT memSegmentGroup = HeapPropertiesToMemorySegmentGroup(committedAllocParams.m_HeapProperties); + m_Budget.AddBlock(memSegmentGroup, allocInfo.SizeInBytes); + m_Budget.AddAllocation(memSegmentGroup, allocInfo.SizeInBytes); + } + return hr; +} + +template +HRESULT AllocatorPimpl::CalcAllocationParams(const ALLOCATION_DESC& allocDesc, UINT64 allocSize, + const D3D12_RESOURCE_DESC_T* resDesc, + BlockVector*& outBlockVector, CommittedAllocationParameters& outCommittedAllocationParams, bool& outPreferCommitted) +{ + outBlockVector = NULL; + outCommittedAllocationParams = CommittedAllocationParameters(); + outPreferCommitted = false; + +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + D3D12MA_ASSERT((allocDesc.HeapType != D3D12_HEAP_TYPE_GPU_UPLOAD || IsGPUUploadHeapSupported()) && + "Trying to allocate from D3D12_HEAP_TYPE_GPU_UPLOAD while GPUUploadHeapSupported == FALSE or D3D12MA_HAS_GPU_UPLOAD_HEAP macro was not defined when compiling D3D12MA library."); +#endif + + bool msaaAlwaysCommitted; + if (allocDesc.CustomPool != NULL) + { + PoolPimpl* const pool = allocDesc.CustomPool->m_Pimpl; + + msaaAlwaysCommitted = pool->GetBlockVector()->DeniesMsaaTextures(); + outBlockVector = pool->GetBlockVector(); + + const auto& desc = pool->GetDesc(); + outCommittedAllocationParams.m_ProtectedSession = desc.pProtectedSession; + outCommittedAllocationParams.m_HeapProperties = desc.HeapProperties; + outCommittedAllocationParams.m_HeapFlags = desc.HeapFlags; + outCommittedAllocationParams.m_List = pool->GetCommittedAllocationList(); + outCommittedAllocationParams.m_ResidencyPriority = pool->GetDesc().ResidencyPriority; + } + else + { + if (!IsHeapTypeStandard(allocDesc.HeapType)) + { + return E_INVALIDARG; + } + msaaAlwaysCommitted = m_MsaaAlwaysCommitted; + + outCommittedAllocationParams.m_HeapProperties = StandardHeapTypeToHeapProperties(allocDesc.HeapType); + outCommittedAllocationParams.m_HeapFlags = allocDesc.ExtraHeapFlags; + outCommittedAllocationParams.m_List = &m_CommittedAllocations[StandardHeapTypeToIndex(allocDesc.HeapType)]; + // outCommittedAllocationParams.m_ResidencyPriority intentionally left with default value. + + const ResourceClass resourceClass = (resDesc != NULL) ? + ResourceDescToResourceClass(*resDesc) : HeapFlagsToResourceClass(allocDesc.ExtraHeapFlags); + const UINT defaultPoolIndex = CalcDefaultPoolIndex(allocDesc, resourceClass); + if (defaultPoolIndex != UINT32_MAX) + { + outBlockVector = m_BlockVectors[defaultPoolIndex]; + const UINT64 preferredBlockSize = outBlockVector->GetPreferredBlockSize(); + if (allocSize > preferredBlockSize) + { + outBlockVector = NULL; + } + else if (allocSize > preferredBlockSize / 2) + { + // Heuristics: Allocate committed memory if requested size if greater than half of preferred block size. + outPreferCommitted = true; + } + } + + const D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; + if (outBlockVector != NULL && extraHeapFlags != 0) + { + outBlockVector = NULL; + } + } + + if ((allocDesc.Flags & ALLOCATION_FLAG_COMMITTED) != 0 || + m_AlwaysCommitted) + { + outBlockVector = NULL; + } + if ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) != 0) + { + outCommittedAllocationParams.m_List = NULL; + } + outCommittedAllocationParams.m_CanAlias = allocDesc.Flags & ALLOCATION_FLAG_CAN_ALIAS; + + if (resDesc != NULL) + { + if (resDesc->SampleDesc.Count > 1 && msaaAlwaysCommitted) + outBlockVector = NULL; + if (!outPreferCommitted && PrefersCommittedAllocation(*resDesc, allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK)) + outPreferCommitted = true; + } + + return (outBlockVector != NULL || outCommittedAllocationParams.m_List != NULL) ? S_OK : E_INVALIDARG; +} + +UINT AllocatorPimpl::CalcDefaultPoolIndex(const ALLOCATION_DESC& allocDesc, ResourceClass resourceClass) const +{ + D3D12_HEAP_FLAGS extraHeapFlags = allocDesc.ExtraHeapFlags & ~RESOURCE_CLASS_HEAP_FLAGS; + +#if D3D12MA_CREATE_NOT_ZEROED_AVAILABLE + // If allocator was created with ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED, also ignore + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED. + if(m_DefaultPoolsNotZeroed) + { + extraHeapFlags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } +#endif + + if (extraHeapFlags != 0) + { + return UINT32_MAX; + } + + UINT poolIndex = UINT_MAX; + switch (allocDesc.HeapType) + { + case D3D12_HEAP_TYPE_DEFAULT: poolIndex = 0; break; + case D3D12_HEAP_TYPE_UPLOAD: poolIndex = 1; break; + case D3D12_HEAP_TYPE_READBACK: poolIndex = 2; break; +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + case D3D12_HEAP_TYPE_GPU_UPLOAD: poolIndex = 3; break; +#endif + default: D3D12MA_ASSERT(0); + } + + if (SupportsResourceHeapTier2()) + return poolIndex; + else + { + switch (resourceClass) + { + case ResourceClass::Buffer: + return poolIndex * 3; + case ResourceClass::Non_RT_DS_Texture: + return poolIndex * 3 + 1; + case ResourceClass::RT_DS_Texture: + return poolIndex * 3 + 2; + default: + return UINT32_MAX; + } + } +} + +void AllocatorPimpl::CalcDefaultPoolParams(D3D12_HEAP_TYPE& outHeapType, D3D12_HEAP_FLAGS& outHeapFlags, UINT index) const +{ + outHeapType = D3D12_HEAP_TYPE_DEFAULT; + outHeapFlags = D3D12_HEAP_FLAG_NONE; + + if (!SupportsResourceHeapTier2()) + { + switch (index % 3) + { + case 0: + outHeapFlags = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + break; + case 1: + outHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; + break; + case 2: + outHeapFlags = D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; + break; + } + + index /= 3; + } + + switch (index) + { + case 0: + outHeapType = D3D12_HEAP_TYPE_DEFAULT; + break; + case 1: + outHeapType = D3D12_HEAP_TYPE_UPLOAD; + break; + case 2: + outHeapType = D3D12_HEAP_TYPE_READBACK; + break; +#ifdef D3D12MA_HAS_GPU_UPLOAD_HEAP + case 3: + outHeapType = D3D12_HEAP_TYPE_GPU_UPLOAD; + break; +#endif + default: + D3D12MA_ASSERT(0); + } +} + +void AllocatorPimpl::RegisterPool(Pool* pool, D3D12_HEAP_TYPE heapType) +{ + const UINT heapTypeIndex = (UINT)heapType - 1; + + MutexLockWrite lock(m_PoolsMutex[heapTypeIndex], m_UseMutex); + m_Pools[heapTypeIndex].PushBack(pool->m_Pimpl); +} + +void AllocatorPimpl::UnregisterPool(Pool* pool, D3D12_HEAP_TYPE heapType) +{ + const UINT heapTypeIndex = (UINT)heapType - 1; + + MutexLockWrite lock(m_PoolsMutex[heapTypeIndex], m_UseMutex); + m_Pools[heapTypeIndex].Remove(pool->m_Pimpl); +} + +HRESULT AllocatorPimpl::UpdateD3D12Budget() +{ +#if D3D12MA_DXGI_1_4 + if (m_Adapter3) + return m_Budget.UpdateBudget(m_Adapter3, m_UseMutex); + else + return E_NOINTERFACE; +#else + return S_OK; +#endif +} + +D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC& resourceDesc, UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats) const +{ + // This is how new D3D12 headers define GetResourceAllocationInfo function - + // different signature depending on these macros. +#if defined(_MSC_VER) || !defined(_WIN32) + return m_Device->GetResourceAllocationInfo(0, 1, &resourceDesc); +#else + D3D12_RESOURCE_ALLOCATION_INFO retVal; + return *m_Device->GetResourceAllocationInfo(&retVal, 0, 1, &resourceDesc); +#endif +} + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ +D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfoNative(const D3D12_RESOURCE_DESC1& resourceDesc, UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats) const +{ + D3D12MA_ASSERT(m_Device8 != NULL); + D3D12_RESOURCE_ALLOCATION_INFO1 info1Unused; + + if (NumCastableFormats != 0 && pCastableFormats) + { +#ifdef __ID3D12Device12_INTERFACE_DEFINED__ + D3D12MA_ASSERT(m_Device12 != NULL); + + // This is how new D3D12 headers define GetResourceAllocationInfo function - + // different signature depending on these macros. + #if defined(_MSC_VER) || !defined(_WIN32) + return m_Device12->GetResourceAllocationInfo3(0, 1, &resourceDesc, &NumCastableFormats, &pCastableFormats, &info1Unused); + #else + D3D12_RESOURCE_ALLOCATION_INFO retVal; + return *m_Device12->GetResourceAllocationInfo3(&retVal, 0, 1, &resourceDesc, &NumCastableFormats, &pCastableFormats, &info1Unused); + #endif + +#else + D3D12MA_ASSERT(false && "GetResourceAllocationInfo with castable formats can not be called without ID3D12Device12 supported"); +#endif + } + + + // This is how new D3D12 headers define GetResourceAllocationInfo function - + // different signature depending on these macros. +#if defined(_MSC_VER) || !defined(_WIN32) + return m_Device8->GetResourceAllocationInfo2(0, 1, &resourceDesc, &info1Unused); +#else + D3D12_RESOURCE_ALLOCATION_INFO retVal; + return *m_Device8->GetResourceAllocationInfo2(&retVal, 0, 1, &resourceDesc, &info1Unused); +#endif +} +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +template +D3D12_RESOURCE_ALLOCATION_INFO AllocatorPimpl::GetResourceAllocationInfo(D3D12_RESOURCE_DESC_T& inOutResourceDesc, UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats) const +{ +#ifdef __ID3D12Device1_INTERFACE_DEFINED__ + /* Optional optimization: Microsoft documentation says: + https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-getresourceallocationinfo + + Your application can forgo using GetResourceAllocationInfo for buffer resources + (D3D12_RESOURCE_DIMENSION_BUFFER). Buffers have the same size on all adapters, + which is merely the smallest multiple of 64KB that's greater or equal to + D3D12_RESOURCE_DESC::Width. + */ + if (inOutResourceDesc.Alignment == 0 && + inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + return { + AlignUp(inOutResourceDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT), // SizeInBytes + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT }; // Alignment + } +#endif // #ifdef __ID3D12Device1_INTERFACE_DEFINED__ + +#if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT + if (inOutResourceDesc.Alignment == 0 && + inOutResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D && + (inOutResourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0 +#if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT == 1 + && CanUseSmallAlignment(inOutResourceDesc) +#endif + ) + { + /* + The algorithm here is based on Microsoft sample: "Small Resources Sample" + https://github.com/microsoft/DirectX-Graphics-Samples/tree/master/Samples/Desktop/D3D12SmallResources + */ + const UINT64 smallAlignmentToTry = inOutResourceDesc.SampleDesc.Count > 1 ? + D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : + D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; + inOutResourceDesc.Alignment = smallAlignmentToTry; + const D3D12_RESOURCE_ALLOCATION_INFO smallAllocInfo = GetResourceAllocationInfoNative(inOutResourceDesc); + // Check if alignment requested has been granted. + if (smallAllocInfo.Alignment == smallAlignmentToTry) + { + return smallAllocInfo; + } + inOutResourceDesc.Alignment = 0; // Restore original + } +#endif // #if D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT + + return GetResourceAllocationInfoNative(inOutResourceDesc, NumCastableFormats, pCastableFormats); +} + +bool AllocatorPimpl::NewAllocationWithinBudget(D3D12_HEAP_TYPE heapType, UINT64 size) +{ + Budget budget = {}; + GetBudgetForHeapType(budget, heapType); + return budget.UsageBytes + size <= budget.BudgetBytes; +} + +void AllocatorPimpl::WriteBudgetToJson(JsonWriter& json, const Budget& budget) +{ + json.BeginObject(); + { + json.WriteString(L"BudgetBytes"); + json.WriteNumber(budget.BudgetBytes); + json.WriteString(L"UsageBytes"); + json.WriteNumber(budget.UsageBytes); + } + json.EndObject(); +} + +#endif // _D3D12MA_ALLOCATOR_PIMPL +#endif // _D3D12MA_ALLOCATOR_PIMPL + +#ifndef _D3D12MA_VIRTUAL_BLOCK_PIMPL +class VirtualBlockPimpl +{ +public: + const ALLOCATION_CALLBACKS m_AllocationCallbacks; + const UINT64 m_Size; + BlockMetadata* m_Metadata; + + VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc); + ~VirtualBlockPimpl(); +}; + +#ifndef _D3D12MA_VIRTUAL_BLOCK_PIMPL_FUNCTIONS +VirtualBlockPimpl::VirtualBlockPimpl(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc) + : m_AllocationCallbacks(allocationCallbacks), m_Size(desc.Size) +{ + switch (desc.Flags & VIRTUAL_BLOCK_FLAG_ALGORITHM_MASK) + { + case VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR: + m_Metadata = D3D12MA_NEW(allocationCallbacks, BlockMetadata_Linear)(&m_AllocationCallbacks, true); + break; + default: + D3D12MA_ASSERT(0); + case 0: + m_Metadata = D3D12MA_NEW(allocationCallbacks, BlockMetadata_TLSF)(&m_AllocationCallbacks, true); + break; + } + m_Metadata->Init(m_Size); +} + +VirtualBlockPimpl::~VirtualBlockPimpl() +{ + D3D12MA_DELETE(m_AllocationCallbacks, m_Metadata); +} +#endif // _D3D12MA_VIRTUAL_BLOCK_PIMPL_FUNCTIONS +#endif // _D3D12MA_VIRTUAL_BLOCK_PIMPL + + +#ifndef _D3D12MA_MEMORY_BLOCK_FUNCTIONS +MemoryBlock::MemoryBlock( + AllocatorPimpl* allocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id) + : m_Allocator(allocator), + m_HeapProps(heapProps), + m_HeapFlags(heapFlags), + m_Size(size), + m_Id(id) {} + +MemoryBlock::~MemoryBlock() +{ + if (m_Heap) + { + m_Heap->Release(); + m_Allocator->m_Budget.RemoveBlock( + m_Allocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), m_Size); + } +} + +HRESULT MemoryBlock::Init(ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures) +{ + D3D12MA_ASSERT(m_Heap == NULL && m_Size > 0); + + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.SizeInBytes = m_Size; + heapDesc.Properties = m_HeapProps; + heapDesc.Alignment = HeapFlagsToAlignment(m_HeapFlags, denyMsaaTextures); + heapDesc.Flags = m_HeapFlags; + + HRESULT hr; +#ifdef __ID3D12Device4_INTERFACE_DEFINED__ + ID3D12Device4* const device4 = m_Allocator->GetDevice4(); + if (device4) + hr = m_Allocator->GetDevice4()->CreateHeap1(&heapDesc, pProtectedSession, D3D12MA_IID_PPV_ARGS(&m_Heap)); + else +#endif + { + if (pProtectedSession == NULL) + hr = m_Allocator->GetDevice()->CreateHeap(&heapDesc, D3D12MA_IID_PPV_ARGS(&m_Heap)); + else + hr = E_NOINTERFACE; + } + + if (SUCCEEDED(hr)) + { + m_Allocator->m_Budget.AddBlock( + m_Allocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), m_Size); + } + return hr; +} +#endif // _D3D12MA_MEMORY_BLOCK_FUNCTIONS + +#ifndef _D3D12MA_NORMAL_BLOCK_FUNCTIONS +NormalBlock::NormalBlock( + AllocatorPimpl* allocator, + BlockVector* blockVector, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 size, + UINT id) + : MemoryBlock(allocator, heapProps, heapFlags, size, id), + m_pMetadata(NULL), + m_BlockVector(blockVector) {} + +NormalBlock::~NormalBlock() +{ + if (m_pMetadata != NULL) + { + // Define macro D3D12MA_DEBUG_LOG to receive the list of the unfreed allocations. + if (!m_pMetadata->IsEmpty()) + m_pMetadata->DebugLogAllAllocations(); + + // THIS IS THE MOST IMPORTANT ASSERT IN THE ENTIRE LIBRARY! + // Hitting it means you have some memory leak - unreleased Allocation objects. + D3D12MA_ASSERT(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + D3D12MA_DELETE(m_Allocator->GetAllocs(), m_pMetadata); + } +} + +HRESULT NormalBlock::Init(UINT32 algorithm, ID3D12ProtectedResourceSession* pProtectedSession, bool denyMsaaTextures) +{ + HRESULT hr = MemoryBlock::Init(pProtectedSession, denyMsaaTextures); + if (FAILED(hr)) + { + return hr; + } + + switch (algorithm) + { + case POOL_FLAG_ALGORITHM_LINEAR: + m_pMetadata = D3D12MA_NEW(m_Allocator->GetAllocs(), BlockMetadata_Linear)(&m_Allocator->GetAllocs(), false); + break; + default: + D3D12MA_ASSERT(0); + case 0: + m_pMetadata = D3D12MA_NEW(m_Allocator->GetAllocs(), BlockMetadata_TLSF)(&m_Allocator->GetAllocs(), false); + break; + } + m_pMetadata->Init(m_Size); + + return hr; +} + +bool NormalBlock::Validate() const +{ + D3D12MA_VALIDATE(GetHeap() && + m_pMetadata && + m_pMetadata->GetSize() != 0 && + m_pMetadata->GetSize() == GetSize()); + return m_pMetadata->Validate(); +} +#endif // _D3D12MA_NORMAL_BLOCK_FUNCTIONS + +#ifndef _D3D12MA_COMMITTED_ALLOCATION_LIST_FUNCTIONS +void CommittedAllocationList::Init(bool useMutex, D3D12_HEAP_TYPE heapType, PoolPimpl* pool) +{ + m_UseMutex = useMutex; + m_HeapType = heapType; + m_Pool = pool; +} + +CommittedAllocationList::~CommittedAllocationList() +{ + if (!m_AllocationList.IsEmpty()) + { + D3D12MA_ASSERT(0 && "Unfreed committed allocations found!"); + } +} + +UINT CommittedAllocationList::GetMemorySegmentGroup(AllocatorPimpl* allocator) const +{ + if (m_Pool) + return allocator->HeapPropertiesToMemorySegmentGroup(m_Pool->GetDesc().HeapProperties); + else + return allocator->StandardHeapTypeToMemorySegmentGroup(m_HeapType); +} + +void CommittedAllocationList::AddStatistics(Statistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + const UINT64 size = alloc->GetSize(); + inoutStats.BlockCount++; + inoutStats.AllocationCount++; + inoutStats.BlockBytes += size; + inoutStats.AllocationBytes += size; + } +} + +void CommittedAllocationList::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + const UINT64 size = alloc->GetSize(); + inoutStats.Stats.BlockCount++; + inoutStats.Stats.BlockBytes += size; + AddDetailedStatisticsAllocation(inoutStats, size); + } +} + +void CommittedAllocationList::BuildStatsString(JsonWriter& json) +{ + MutexLockRead lock(m_Mutex, m_UseMutex); + + for (Allocation* alloc = m_AllocationList.Front(); + alloc != NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + json.AddAllocationToObject(*alloc); + json.EndObject(); + } +} + +void CommittedAllocationList::Register(Allocation* alloc) +{ + MutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void CommittedAllocationList::Unregister(Allocation* alloc) +{ + MutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _D3D12MA_COMMITTED_ALLOCATION_LIST_FUNCTIONS + +#ifndef _D3D12MA_BLOCK_VECTOR_FUNCTIONS +BlockVector::BlockVector( + AllocatorPimpl* hAllocator, + const D3D12_HEAP_PROPERTIES& heapProps, + D3D12_HEAP_FLAGS heapFlags, + UINT64 preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + bool explicitBlockSize, + UINT64 minAllocationAlignment, + UINT32 algorithm, + bool denyMsaaTextures, + ID3D12ProtectedResourceSession* pProtectedSession, + D3D12_RESIDENCY_PRIORITY residencyPriority) + : m_hAllocator(hAllocator), + m_HeapProps(heapProps), + m_HeapFlags(heapFlags), + m_PreferredBlockSize(preferredBlockSize), + m_MinBlockCount(minBlockCount), + m_MaxBlockCount(maxBlockCount), + m_ExplicitBlockSize(explicitBlockSize), + m_MinAllocationAlignment(minAllocationAlignment), + m_Algorithm(algorithm), + m_DenyMsaaTextures(denyMsaaTextures), + m_ProtectedSession(pProtectedSession), + m_ResidencyPriority(residencyPriority), + m_HasEmptyBlock(false), + m_Blocks(hAllocator->GetAllocs()), + m_NextBlockId(0) {} + +BlockVector::~BlockVector() +{ + for (size_t i = m_Blocks.size(); i--; ) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), m_Blocks[i]); + } +} + +HRESULT BlockVector::CreateMinBlocks() +{ + for (size_t i = 0; i < m_MinBlockCount; ++i) + { + HRESULT hr = CreateBlock(m_PreferredBlockSize, NULL); + if (FAILED(hr)) + { + return hr; + } + } + return S_OK; +} + +bool BlockVector::IsEmpty() +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + return m_Blocks.empty(); +} + +HRESULT BlockVector::Allocate( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + size_t allocationCount, + Allocation** pAllocations) +{ + size_t allocIndex; + HRESULT hr = S_OK; + + { + MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + hr = AllocatePage( + size, + alignment, + allocDesc, + pAllocations + allocIndex); + if (FAILED(hr)) + { + break; + } + } + } + + if (FAILED(hr)) + { + // Free all already created allocations. + while (allocIndex--) + { + Free(pAllocations[allocIndex]); + } + ZeroMemory(pAllocations, sizeof(Allocation*) * allocationCount); + } + + return hr; +} + +void BlockVector::Free(Allocation* hAllocation) +{ + NormalBlock* pBlockToDelete = NULL; + + bool budgetExceeded = false; + if (IsHeapTypeStandard(m_HeapProps.Type)) + { + Budget budget = {}; + m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); + budgetExceeded = budget.UsageBytes >= budget.BudgetBytes; + } + + // Scope for lock. + { + MutexLockWrite lock(m_Mutex, m_hAllocator->UseMutex()); + + NormalBlock* pBlock = hAllocation->m_Placed.block; + + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + + const size_t blockCount = m_Blocks.size(); + // pBlock became empty after this deallocation. + if (pBlock->m_pMetadata->IsEmpty()) + { + // Already has empty Allocation. We don't want to have two, so delete this one. + if ((m_HasEmptyBlock || budgetExceeded) && + blockCount > m_MinBlockCount) + { + pBlockToDelete = pBlock; + Remove(pBlock); + } + // We now have first empty block. + else + { + m_HasEmptyBlock = true; + } + } + // pBlock didn't become empty, but we have another empty block - find and free that one. + // (This is optional, heuristics.) + else if (m_HasEmptyBlock && blockCount > m_MinBlockCount) + { + NormalBlock* pLastBlock = m_Blocks.back(); + if (pLastBlock->m_pMetadata->IsEmpty()) + { + pBlockToDelete = pLastBlock; + m_Blocks.pop_back(); + m_HasEmptyBlock = false; + } + } + + IncrementallySortBlocks(); + } + + // Destruction of a free Allocation. Deferred until this point, outside of mutex + // lock, for performance reason. + if (pBlockToDelete != NULL) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlockToDelete); + } +} + +HRESULT BlockVector::CreateResource( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + const CREATE_RESOURCE_PARAMS& createParams, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + HRESULT hr = Allocate(size, alignment, allocDesc, 1, ppAllocation); + if (SUCCEEDED(hr)) + { + ID3D12Resource* res = NULL; + hr = m_hAllocator->CreatePlacedResourceWrap( + (*ppAllocation)->m_Placed.block->GetHeap(), + (*ppAllocation)->GetOffset(), + createParams, + D3D12MA_IID_PPV_ARGS(&res)); + if (SUCCEEDED(hr)) + { + if (ppvResource != NULL) + { + hr = res->QueryInterface(riidResource, ppvResource); + } + if (SUCCEEDED(hr)) + { + (*ppAllocation)->SetResourcePointer(res, createParams.GetBaseResourceDesc()); + } + else + { + res->Release(); + SAFE_RELEASE(*ppAllocation); + } + } + else + { + SAFE_RELEASE(*ppAllocation); + } + } + return hr; +} + +void BlockVector::AddStatistics(Statistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void BlockVector::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); + } +} + +void BlockVector::WriteBlockInfoToJson(JsonWriter& json) +{ + MutexLockRead lock(m_Mutex, m_hAllocator->UseMutex()); + + json.BeginObject(); + + for (size_t i = 0, count = m_Blocks.size(); i < count; ++i) + { + const NormalBlock* const pBlock = m_Blocks[i]; + D3D12MA_ASSERT(pBlock); + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + json.BeginString(); + json.ContinueString(pBlock->GetId()); + json.EndString(); + + json.BeginObject(); + pBlock->m_pMetadata->WriteAllocationInfoToJson(json); + json.EndObject(); + } + + json.EndObject(); +} + +UINT64 BlockVector::CalcSumBlockSize() const +{ + UINT64 result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result += m_Blocks[i]->m_pMetadata->GetSize(); + } + return result; +} + +UINT64 BlockVector::CalcMaxBlockSize() const +{ + UINT64 result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result = D3D12MA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); + if (result >= m_PreferredBlockSize) + { + break; + } + } + return result; +} + +void BlockVector::Remove(NormalBlock* pBlock) +{ + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + if (m_Blocks[blockIndex] == pBlock) + { + m_Blocks.remove(blockIndex); + return; + } + } + D3D12MA_ASSERT(0); +} + +void BlockVector::IncrementallySortBlocks() +{ + if (!m_IncrementalSort) + return; + // Bubble sort only until first swap. + for (size_t i = 1; i < m_Blocks.size(); ++i) + { + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + { + D3D12MA_SWAP(m_Blocks[i - 1], m_Blocks[i]); + return; + } + } +} + +void BlockVector::SortByFreeSize() +{ + D3D12MA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](auto* b1, auto* b2) + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + +HRESULT BlockVector::AllocatePage( + UINT64 size, + UINT64 alignment, + const ALLOCATION_DESC& allocDesc, + Allocation** pAllocation) +{ + // Early reject: requested allocation size is larger that maximum block size for this block vector. + if (size + D3D12MA_DEBUG_MARGIN > m_PreferredBlockSize) + { + return E_OUTOFMEMORY; + } + + UINT64 freeMemory = UINT64_MAX; + if (IsHeapTypeStandard(m_HeapProps.Type)) + { + Budget budget = {}; + m_hAllocator->GetBudgetForHeapType(budget, m_HeapProps.Type); + freeMemory = (budget.UsageBytes < budget.BudgetBytes) ? (budget.BudgetBytes - budget.UsageBytes) : 0; + } + + const bool canCreateNewBlock = + ((allocDesc.Flags & ALLOCATION_FLAG_NEVER_ALLOCATE) == 0) && + (m_Blocks.size() < m_MaxBlockCount) && + // Even if we don't have to stay within budget with this allocation, when the + // budget would be exceeded, we don't want to allocate new blocks, but always + // create resources as committed. + freeMemory >= size; + + // 1. Search existing allocations + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + NormalBlock* const pCurrBlock = m_Blocks[blockIndex]; + D3D12MA_ASSERT(pCurrBlock); + HRESULT hr = AllocateFromBlock( + pCurrBlock, + size, + alignment, + allocDesc.Flags, + allocDesc.pPrivateData, + allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK, + pAllocation); + if (SUCCEEDED(hr)) + { + return hr; + } + } + } + + // 2. Try to create new block. + if (canCreateNewBlock) + { + // Calculate optimal size for new block. + UINT64 newBlockSize = m_PreferredBlockSize; + UINT newBlockSizeShift = 0; + + if (!m_ExplicitBlockSize) + { + // Allocate 1/8, 1/4, 1/2 as first blocks. + const UINT64 maxExistingBlockSize = CalcMaxBlockSize(); + for (UINT i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + { + const UINT64 smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; + } + } + } + + size_t newBlockIndex = 0; + HRESULT hr = newBlockSize <= freeMemory ? + CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (FAILED(hr) && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const UINT64 smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + hr = newBlockSize <= freeMemory ? + CreateBlock(newBlockSize, &newBlockIndex) : E_OUTOFMEMORY; + } + else + { + break; + } + } + } + + if (SUCCEEDED(hr)) + { + NormalBlock* const pBlock = m_Blocks[newBlockIndex]; + D3D12MA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + hr = AllocateFromBlock( + pBlock, + size, + alignment, + allocDesc.Flags, + allocDesc.pPrivateData, + allocDesc.Flags & ALLOCATION_FLAG_STRATEGY_MASK, + pAllocation); + if (SUCCEEDED(hr)) + { + return hr; + } + else + { + // Allocation from new block failed, possibly due to D3D12MA_DEBUG_MARGIN or alignment. + return E_OUTOFMEMORY; + } + } + } + + return E_OUTOFMEMORY; +} + +HRESULT BlockVector::AllocateFromBlock( + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + ALLOCATION_FLAGS allocFlags, + void* pPrivateData, + UINT32 strategy, + Allocation** pAllocation) +{ + alignment = D3D12MA_MAX(alignment, m_MinAllocationAlignment); + + AllocationRequest currRequest = {}; + if (pBlock->m_pMetadata->CreateAllocationRequest( + size, + alignment, + allocFlags & ALLOCATION_FLAG_UPPER_ADDRESS, + strategy, + &currRequest)) + { + return CommitAllocationRequest(currRequest, pBlock, size, alignment, pPrivateData, pAllocation); + } + return E_OUTOFMEMORY; +} + +HRESULT BlockVector::CommitAllocationRequest( + AllocationRequest& allocRequest, + NormalBlock* pBlock, + UINT64 size, + UINT64 alignment, + void* pPrivateData, + Allocation** pAllocation) +{ + // We no longer have an empty Allocation. + if (pBlock->m_pMetadata->IsEmpty()) + m_HasEmptyBlock = false; + + *pAllocation = m_hAllocator->GetAllocationObjectAllocator().Allocate(m_hAllocator, size, alignment); + pBlock->m_pMetadata->Alloc(allocRequest, size, *pAllocation); + + (*pAllocation)->InitPlaced(allocRequest.allocHandle, pBlock); + (*pAllocation)->SetPrivateData(pPrivateData); + + D3D12MA_HEAVY_ASSERT(pBlock->Validate()); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->HeapPropertiesToMemorySegmentGroup(m_HeapProps), size); + + return S_OK; +} + +HRESULT BlockVector::CreateBlock( + UINT64 blockSize, + size_t* pNewBlockIndex) +{ + NormalBlock* const pBlock = D3D12MA_NEW(m_hAllocator->GetAllocs(), NormalBlock)( + m_hAllocator, + this, + m_HeapProps, + m_HeapFlags, + blockSize, + m_NextBlockId++); + HRESULT hr = pBlock->Init(m_Algorithm, m_ProtectedSession, m_DenyMsaaTextures); + if (FAILED(hr)) + { + D3D12MA_DELETE(m_hAllocator->GetAllocs(), pBlock); + return hr; + } + + m_hAllocator->SetResidencyPriority(pBlock->GetHeap(), m_ResidencyPriority); + + m_Blocks.push_back(pBlock); + if (pNewBlockIndex != NULL) + { + *pNewBlockIndex = m_Blocks.size() - 1; + } + + return hr; +} +#endif // _D3D12MA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL_FUNCTIONS +DefragmentationContextPimpl::DefragmentationContextPimpl( + AllocatorPimpl* hAllocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector) + : m_MaxPassBytes(desc.MaxBytesPerPass == 0 ? UINT64_MAX : desc.MaxBytesPerPass), + m_MaxPassAllocations(desc.MaxAllocationsPerPass == 0 ? UINT32_MAX : desc.MaxAllocationsPerPass), + m_Moves(hAllocator->GetAllocs()) +{ + m_Algorithm = desc.Flags & DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (poolVector != NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = poolVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetDefaultPoolCount(); + m_PoolBlockVector = NULL; + m_pBlockVectors = hAllocator->GetDefaultPools(); + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + BlockVector* vector = m_pBlockVectors[i]; + if (vector != NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED; + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + { + m_AlgorithmState = D3D12MA_NEW_ARRAY(hAllocator->GetAllocs(), StateBalanced, m_BlockVectorCount); + break; + } + } +} + +DefragmentationContextPimpl::~DefragmentationContextPimpl() +{ + if (m_PoolBlockVector != NULL) + m_PoolBlockVector->SetIncrementalSort(true); + else + { + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + BlockVector* vector = m_pBlockVectors[i]; + if (vector != NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + D3D12MA_DELETE_ARRAY(m_Moves.GetAllocs(), reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + D3D12MA_ASSERT(0); + } + } +} + +HRESULT DefragmentationContextPimpl::DefragmentPassBegin(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo) +{ + if (m_PoolBlockVector != NULL) + { + MutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->m_hAllocator->UseMutex()); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + + // Setup index into block vector + for (size_t i = 0; i < m_Moves.size(); ++i) + m_Moves[i].pDstTmpAllocation->SetPrivateData(0); + } + else + { + for (UINT32 i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != NULL) + { + MutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->m_hAllocator->UseMutex()); + + bool end = false; + size_t movesOffset = m_Moves.size(); + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + end = ComputeDefragmentation(*m_pBlockVectors[i], i); + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + end = ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0)); + } + + // Setup index into block vector + for (; movesOffset < m_Moves.size(); ++movesOffset) + m_Moves[movesOffset].pDstTmpAllocation->SetPrivateData(reinterpret_cast(static_cast(i))); + + if (end) + break; + } + } + } + + moveInfo.MoveCount = static_cast(m_Moves.size()); + if (moveInfo.MoveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return S_FALSE; + } + + moveInfo.pMoves = NULL; + return S_OK; +} + +HRESULT DefragmentationContextPimpl::DefragmentPassEnd(DEFRAGMENTATION_PASS_MOVE_INFO& moveInfo) +{ + D3D12MA_ASSERT(moveInfo.MoveCount > 0 ? moveInfo.pMoves != NULL : true); + + HRESULT result = S_OK; + Vector immovableBlocks(m_Moves.GetAllocs()); + + for (uint32_t i = 0; i < moveInfo.MoveCount; ++i) + { + DEFRAGMENTATION_MOVE& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + UINT64 freedBlockSize = 0; + + UINT32 vectorIndex; + BlockVector* vector; + if (m_PoolBlockVector != NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = static_cast(reinterpret_cast(move.pDstTmpAllocation->GetPrivateData())); + vector = m_pBlockVectors[vectorIndex]; + D3D12MA_ASSERT(vector != NULL); + } + + switch (move.Operation) + { + case DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + move.pSrcAllocation->SwapBlockAllocation(move.pDstTmpAllocation); + + // Scope for locks, Free have it's own lock + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.pDstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pDstTmpAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + currentCount = vector->GetBlockCount(); + } + + result = S_FALSE; + break; + } + case DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.BytesMoved -= move.pSrcAllocation->GetSize(); + --m_PassStats.AllocationsMoved; + move.pDstTmpAllocation->Release(); + + NormalBlock* newBlock = move.pSrcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.BytesMoved -= move.pSrcAllocation->GetSize(); + --m_PassStats.AllocationsMoved; + // Scope for locks, Free have it's own lock + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.pSrcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pSrcAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + UINT64 dstBlockSize; + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + dstBlockSize = move.pDstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + move.pDstTmpAllocation->Release(); + { + MutexLockRead lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = S_FALSE; + break; + } + default: + D3D12MA_ASSERT(0); + } + + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.HeapsFreed += static_cast(freedBlocks); + m_PassStats.BytesFreed += freedBlockSize; + } + } + moveInfo.MoveCount = 0; + moveInfo.pMoves = NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.AllocationsMoved += m_PassStats.AllocationsMoved; + m_GlobalStats.BytesFreed += m_PassStats.BytesFreed; + m_GlobalStats.BytesMoved += m_PassStats.BytesMoved; + m_GlobalStats.HeapsFreed += m_PassStats.HeapsFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + // Move to the begining + for (const FragmentedBlock& block : immovableBlocks) + { + BlockVector* vector = m_pBlockVectors[block.data]; + MutexLockWrite lock(vector->GetMutex(), vector->m_hAllocator->UseMutex()); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + D3D12MA_SWAP(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } + return result; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation(BlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case DEFRAGMENTATION_FLAG_ALGORITHM_FAST: + return ComputeDefragmentation_Fast(vector); + default: + D3D12MA_ASSERT(0); + case DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED: + return ComputeDefragmentation_Balanced(vector, index, true); + case DEFRAGMENTATION_FLAG_ALGORITHM_FULL: + return ComputeDefragmentation_Full(vector); + } +} + +DefragmentationContextPimpl::MoveAllocationData DefragmentationContextPimpl::GetMoveData( + AllocHandle handle, BlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.pSrcAllocation = (Allocation*)metadata->GetAllocationPrivateData(handle); + moveData.size = moveData.move.pSrcAllocation->GetSize(); + moveData.alignment = moveData.move.pSrcAllocation->GetAlignment(); + moveData.flags = ALLOCATION_FLAG_NONE; + + return moveData; +} + +DefragmentationContextPimpl::CounterStatus DefragmentationContextPimpl::CheckCounters(UINT64 bytes) +{ + // Ignore allocation if will exceed max size for copy + if (m_PassStats.BytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + return CounterStatus::Pass; +} + +bool DefragmentationContextPimpl::IncrementCounters(UINT64 bytes) +{ + m_PassStats.BytesMoved += bytes; + // Early return when max found + if (++m_PassStats.AllocationsMoved >= m_MaxPassAllocations || m_PassStats.BytesMoved >= m_MaxPassBytes) + { + D3D12MA_ASSERT((m_PassStats.AllocationsMoved == m_MaxPassAllocations || + m_PassStats.BytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool DefragmentationContextPimpl::ReallocWithinBlock(BlockVector& vector, NormalBlock* block) +{ + BlockMetadata* metadata = block->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool DefragmentationContextPimpl::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, BlockVector& vector) +{ + for (; start < end; ++start) + { + NormalBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (SUCCEEDED(vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + 0, + &data.move.pDstTmpAllocation))) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Fast(BlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + BlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Balanced(BlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticable gaps between them (some heuristic, ex. average size of allocation in block) + D3D12MA_ASSERT(m_AlgorithmState != NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + UINT64 minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + NormalBlock* block = vector.GetBlock(i); + BlockMetadata* metadata = block->m_pMetadata; + UINT64 prevFreeRegionSize = 0; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + UINT64 nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves perfomed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool DefragmentationContextPimpl::ComputeDefragmentation_Full(BlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + NormalBlock* block = vector.GetBlock(i); + BlockMetadata* metadata = block->m_pMetadata; + + for (AllocHandle handle = metadata->GetAllocationListBegin(); + handle != (AllocHandle)0; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.pSrcAllocation->GetPrivateData() == this) + continue; + switch (CheckCounters(moveData.move.pSrcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + default: + D3D12MA_ASSERT(0); + case CounterStatus::Pass: + break; + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + UINT64 offset = moveData.move.pSrcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + AllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (SUCCEEDED(vector.CommitAllocationRequest( + request, + block, + moveData.size, + moveData.alignment, + this, + &moveData.move.pDstTmpAllocation))) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +void DefragmentationContextPimpl::UpdateVectorStatistics(BlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + BlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_PIMPL_FUNCTIONS + +#ifndef _D3D12MA_POOL_PIMPL_FUNCTIONS +PoolPimpl::PoolPimpl(AllocatorPimpl* allocator, const POOL_DESC& desc) + : m_Allocator(allocator), + m_Desc(desc), + m_BlockVector(NULL), + m_Name(NULL) +{ + const bool explicitBlockSize = desc.BlockSize != 0; + const UINT64 preferredBlockSize = explicitBlockSize ? desc.BlockSize : D3D12MA_DEFAULT_BLOCK_SIZE; + UINT maxBlockCount = desc.MaxBlockCount != 0 ? desc.MaxBlockCount : UINT_MAX; + +#ifndef __ID3D12Device4_INTERFACE_DEFINED__ + D3D12MA_ASSERT(m_Desc.pProtectedSession == NULL); +#endif + + m_BlockVector = D3D12MA_NEW(allocator->GetAllocs(), BlockVector)( + allocator, desc.HeapProperties, desc.HeapFlags, + preferredBlockSize, + desc.MinBlockCount, maxBlockCount, + explicitBlockSize, + D3D12MA_MAX(desc.MinAllocationAlignment, (UINT64)D3D12MA_DEBUG_ALIGNMENT), + (desc.Flags & POOL_FLAG_ALGORITHM_MASK) != 0, + (desc.Flags & POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED) != 0, + desc.pProtectedSession, + desc.ResidencyPriority); +} + +PoolPimpl::~PoolPimpl() +{ + D3D12MA_ASSERT(m_PrevPool == NULL && m_NextPool == NULL); + FreeName(); + D3D12MA_DELETE(m_Allocator->GetAllocs(), m_BlockVector); +} + +HRESULT PoolPimpl::Init() +{ + m_CommittedAllocations.Init(m_Allocator->UseMutex(), m_Desc.HeapProperties.Type, this); + return m_BlockVector->CreateMinBlocks(); +} + +void PoolPimpl::GetStatistics(Statistics& outStats) +{ + ClearStatistics(outStats); + m_BlockVector->AddStatistics(outStats); + m_CommittedAllocations.AddStatistics(outStats); +} + +void PoolPimpl::CalculateStatistics(DetailedStatistics& outStats) +{ + ClearDetailedStatistics(outStats); + AddDetailedStatistics(outStats); +} + +void PoolPimpl::AddDetailedStatistics(DetailedStatistics& inoutStats) +{ + m_BlockVector->AddDetailedStatistics(inoutStats); + m_CommittedAllocations.AddDetailedStatistics(inoutStats); +} + +void PoolPimpl::SetName(LPCWSTR Name) +{ + FreeName(); + + if (Name) + { + const size_t nameCharCount = wcslen(Name) + 1; + m_Name = D3D12MA_NEW_ARRAY(m_Allocator->GetAllocs(), WCHAR, nameCharCount); + memcpy(m_Name, Name, nameCharCount * sizeof(WCHAR)); + } +} + +void PoolPimpl::FreeName() +{ + if (m_Name) + { + const size_t nameCharCount = wcslen(m_Name) + 1; + D3D12MA_DELETE_ARRAY(m_Allocator->GetAllocs(), m_Name, nameCharCount); + m_Name = NULL; + } +} +#endif // _D3D12MA_POOL_PIMPL_FUNCTIONS + + +#ifndef _D3D12MA_PUBLIC_INTERFACE +HRESULT CreateAllocator(const ALLOCATOR_DESC* pDesc, Allocator** ppAllocator) +{ + if (!pDesc || !ppAllocator || !pDesc->pDevice || !pDesc->pAdapter || + !(pDesc->PreferredBlockSize == 0 || (pDesc->PreferredBlockSize >= 16 && pDesc->PreferredBlockSize < 0x10000000000ull))) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateAllocator."); + return E_INVALIDARG; + } + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + ALLOCATION_CALLBACKS allocationCallbacks; + SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); + + *ppAllocator = D3D12MA_NEW(allocationCallbacks, Allocator)(allocationCallbacks, *pDesc); + HRESULT hr = (*ppAllocator)->m_Pimpl->Init(*pDesc); + if (FAILED(hr)) + { + D3D12MA_DELETE(allocationCallbacks, *ppAllocator); + *ppAllocator = NULL; + } + return hr; +} + +HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualBlock** ppVirtualBlock) +{ + if (!pDesc || !ppVirtualBlock) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to CreateVirtualBlock."); + return E_INVALIDARG; + } + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + ALLOCATION_CALLBACKS allocationCallbacks; + SetupAllocationCallbacks(allocationCallbacks, pDesc->pAllocationCallbacks); + + *ppVirtualBlock = D3D12MA_NEW(allocationCallbacks, VirtualBlock)(allocationCallbacks, *pDesc); + return S_OK; +} + +#ifndef _D3D12MA_IUNKNOWN_IMPL_FUNCTIONS +HRESULT STDMETHODCALLTYPE IUnknownImpl::QueryInterface(REFIID riid, void** ppvObject) +{ + if (ppvObject == NULL) + return E_POINTER; + if (riid == IID_IUnknown) + { + ++m_RefCount; + *ppvObject = this; + return S_OK; + } + *ppvObject = NULL; + return E_NOINTERFACE; +} + +ULONG STDMETHODCALLTYPE IUnknownImpl::AddRef() +{ + return ++m_RefCount; +} + +ULONG STDMETHODCALLTYPE IUnknownImpl::Release() +{ + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + const uint32_t newRefCount = --m_RefCount; + if (newRefCount == 0) + ReleaseThis(); + return newRefCount; +} +#endif // _D3D12MA_IUNKNOWN_IMPL_FUNCTIONS + +#ifndef _D3D12MA_ALLOCATION_FUNCTIONS +void Allocation::PackedData::SetType(Type type) +{ + const UINT u = (UINT)type; + D3D12MA_ASSERT(u < (1u << 2)); + m_Type = u; +} + +void Allocation::PackedData::SetResourceDimension(D3D12_RESOURCE_DIMENSION resourceDimension) +{ + const UINT u = (UINT)resourceDimension; + D3D12MA_ASSERT(u < (1u << 3)); + m_ResourceDimension = u; +} + +void Allocation::PackedData::SetResourceFlags(D3D12_RESOURCE_FLAGS resourceFlags) +{ + const UINT u = (UINT)resourceFlags; + D3D12MA_ASSERT(u < (1u << 24)); + m_ResourceFlags = u; +} + +void Allocation::PackedData::SetTextureLayout(D3D12_TEXTURE_LAYOUT textureLayout) +{ + const UINT u = (UINT)textureLayout; + D3D12MA_ASSERT(u < (1u << 9)); + m_TextureLayout = u; +} + +UINT64 Allocation::GetOffset() const +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + case TYPE_HEAP: + return 0; + case TYPE_PLACED: + return m_Placed.block->m_pMetadata->GetAllocationOffset(m_Placed.allocHandle); + default: + D3D12MA_ASSERT(0); + return 0; + } +} + +void Allocation::SetResource(ID3D12Resource* pResource) +{ + if (pResource != m_Resource) + { + if (m_Resource) + m_Resource->Release(); + m_Resource = pResource; + if (m_Resource) + m_Resource->AddRef(); + } +} + +ID3D12Heap* Allocation::GetHeap() const +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + return NULL; + case TYPE_PLACED: + return m_Placed.block->GetHeap(); + case TYPE_HEAP: + return m_Heap.heap; + default: + D3D12MA_ASSERT(0); + return 0; + } +} + +void Allocation::SetName(LPCWSTR Name) +{ + FreeName(); + + if (Name) + { + const size_t nameCharCount = wcslen(Name) + 1; + m_Name = D3D12MA_NEW_ARRAY(m_Allocator->GetAllocs(), WCHAR, nameCharCount); + memcpy(m_Name, Name, nameCharCount * sizeof(WCHAR)); + } +} + +void Allocation::ReleaseThis() +{ + if (this == NULL) + { + return; + } + + SAFE_RELEASE(m_Resource); + + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + m_Allocator->FreeCommittedMemory(this); + break; + case TYPE_PLACED: + m_Allocator->FreePlacedMemory(this); + break; + case TYPE_HEAP: + m_Allocator->FreeHeapMemory(this); + break; + } + + FreeName(); + + m_Allocator->GetAllocationObjectAllocator().Free(this); +} + +Allocation::Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment) + : m_Allocator{ allocator }, + m_Size{ size }, + m_Alignment{ alignment }, + m_Resource{ NULL }, + m_pPrivateData{ NULL }, + m_Name{ NULL } +{ + D3D12MA_ASSERT(allocator); + + m_PackedData.SetType(TYPE_COUNT); + m_PackedData.SetResourceDimension(D3D12_RESOURCE_DIMENSION_UNKNOWN); + m_PackedData.SetResourceFlags(D3D12_RESOURCE_FLAG_NONE); + m_PackedData.SetTextureLayout(D3D12_TEXTURE_LAYOUT_UNKNOWN); +} + +void Allocation::InitCommitted(CommittedAllocationList* list) +{ + m_PackedData.SetType(TYPE_COMMITTED); + m_Committed.list = list; + m_Committed.prev = NULL; + m_Committed.next = NULL; +} + +void Allocation::InitPlaced(AllocHandle allocHandle, NormalBlock* block) +{ + m_PackedData.SetType(TYPE_PLACED); + m_Placed.allocHandle = allocHandle; + m_Placed.block = block; +} + +void Allocation::InitHeap(CommittedAllocationList* list, ID3D12Heap* heap) +{ + m_PackedData.SetType(TYPE_HEAP); + m_Heap.list = list; + m_Committed.prev = NULL; + m_Committed.next = NULL; + m_Heap.heap = heap; +} + +void Allocation::SwapBlockAllocation(Allocation* allocation) +{ + D3D12MA_ASSERT(allocation != NULL); + D3D12MA_ASSERT(m_PackedData.GetType() == TYPE_PLACED); + D3D12MA_ASSERT(allocation->m_PackedData.GetType() == TYPE_PLACED); + + D3D12MA_SWAP(m_Resource, allocation->m_Resource); + m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, allocation); + D3D12MA_SWAP(m_Placed, allocation->m_Placed); + m_Placed.block->m_pMetadata->SetAllocationPrivateData(m_Placed.allocHandle, this); +} + +AllocHandle Allocation::GetAllocHandle() const +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + case TYPE_HEAP: + return (AllocHandle)0; + case TYPE_PLACED: + return m_Placed.allocHandle; + default: + D3D12MA_ASSERT(0); + return (AllocHandle)0; + } +} + +NormalBlock* Allocation::GetBlock() +{ + switch (m_PackedData.GetType()) + { + case TYPE_COMMITTED: + case TYPE_HEAP: + return NULL; + case TYPE_PLACED: + return m_Placed.block; + default: + D3D12MA_ASSERT(0); + return NULL; + } +} + +template +void Allocation::SetResourcePointer(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc) +{ + D3D12MA_ASSERT(m_Resource == NULL && pResourceDesc); + m_Resource = resource; + m_PackedData.SetResourceDimension(pResourceDesc->Dimension); + m_PackedData.SetResourceFlags(pResourceDesc->Flags); + m_PackedData.SetTextureLayout(pResourceDesc->Layout); +} + +void Allocation::FreeName() +{ + if (m_Name) + { + const size_t nameCharCount = wcslen(m_Name) + 1; + D3D12MA_DELETE_ARRAY(m_Allocator->GetAllocs(), m_Name, nameCharCount); + m_Name = NULL; + } +} +#endif // _D3D12MA_ALLOCATION_FUNCTIONS + +#ifndef _D3D12MA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +HRESULT DefragmentationContext::BeginPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo) +{ + D3D12MA_ASSERT(pPassInfo); + return m_Pimpl->DefragmentPassBegin(*pPassInfo); +} + +HRESULT DefragmentationContext::EndPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo) +{ + D3D12MA_ASSERT(pPassInfo); + return m_Pimpl->DefragmentPassEnd(*pPassInfo); +} + +void DefragmentationContext::GetStats(DEFRAGMENTATION_STATS* pStats) +{ + D3D12MA_ASSERT(pStats); + m_Pimpl->GetStats(*pStats); +} + +void DefragmentationContext::ReleaseThis() +{ + if (this == NULL) + { + return; + } + + D3D12MA_DELETE(m_Pimpl->GetAllocs(), this); +} + +DefragmentationContext::DefragmentationContext(AllocatorPimpl* allocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector) + : m_Pimpl(D3D12MA_NEW(allocator->GetAllocs(), DefragmentationContextPimpl)(allocator, desc, poolVector)) {} + +DefragmentationContext::~DefragmentationContext() +{ + D3D12MA_DELETE(m_Pimpl->GetAllocs(), m_Pimpl); +} +#endif // _D3D12MA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _D3D12MA_POOL_FUNCTIONS +POOL_DESC Pool::GetDesc() const +{ + return m_Pimpl->GetDesc(); +} + +void Pool::GetStatistics(Statistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->GetStatistics(*pStats); +} + +void Pool::CalculateStatistics(DetailedStatistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->CalculateStatistics(*pStats); +} + +void Pool::SetName(LPCWSTR Name) +{ + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->SetName(Name); +} + +LPCWSTR Pool::GetName() const +{ + return m_Pimpl->GetName(); +} + +HRESULT Pool::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext) +{ + D3D12MA_ASSERT(pDesc && ppContext); + + // Check for support + if (m_Pimpl->GetBlockVector()->GetAlgorithm() & POOL_FLAG_ALGORITHM_LINEAR) + return E_NOINTERFACE; + + AllocatorPimpl* allocator = m_Pimpl->GetAllocator(); + *ppContext = D3D12MA_NEW(allocator->GetAllocs(), DefragmentationContext)(allocator, *pDesc, m_Pimpl->GetBlockVector()); + return S_OK; +} + +void Pool::ReleaseThis() +{ + if (this == NULL) + { + return; + } + + D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), this); +} + +Pool::Pool(Allocator* allocator, const POOL_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocator->m_Pimpl->GetAllocs(), PoolPimpl)(allocator->m_Pimpl, desc)) {} + +Pool::~Pool() +{ + m_Pimpl->GetAllocator()->UnregisterPool(this, m_Pimpl->GetDesc().HeapProperties.Type); + + D3D12MA_DELETE(m_Pimpl->GetAllocator()->GetAllocs(), m_Pimpl); +} +#endif // _D3D12MA_POOL_FUNCTIONS + +#ifndef _D3D12MA_ALLOCATOR_FUNCTIONS +const D3D12_FEATURE_DATA_D3D12_OPTIONS& Allocator::GetD3D12Options() const +{ + return m_Pimpl->GetD3D12Options(); +} + +BOOL Allocator::IsUMA() const +{ + return m_Pimpl->IsUMA(); +} + +BOOL Allocator::IsCacheCoherentUMA() const +{ + return m_Pimpl->IsCacheCoherentUMA(); +} + +BOOL Allocator::IsGPUUploadHeapSupported() const +{ + return m_Pimpl->IsGPUUploadHeapSupported(); +} + +UINT64 Allocator::GetMemoryCapacity(UINT memorySegmentGroup) const +{ + return m_Pimpl->GetMemoryCapacity(memorySegmentGroup); +} + +HRESULT Allocator::CreateResource( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocDesc || !pResourceDesc || !ppAllocation) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateResource."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateResource( + pAllocDesc, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialResourceState, pOptimizedClearValue), + ppAllocation, + riidResource, + ppvResource); +} + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ +HRESULT Allocator::CreateResource2( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocDesc || !pResourceDesc || !ppAllocation) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateResource2."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateResource( + pAllocDesc, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialResourceState, pOptimizedClearValue), + ppAllocation, + riidResource, + ppvResource); +} +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ +HRESULT Allocator::CreateResource3( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_BARRIER_LAYOUT InitialLayout, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocDesc || !pResourceDesc || !ppAllocation) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateResource3."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateResource( + pAllocDesc, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialLayout, pOptimizedClearValue, NumCastableFormats, pCastableFormats), + ppAllocation, + riidResource, + ppvResource); +} +#endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ + +HRESULT Allocator::AllocateMemory( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation) +{ + if (!ValidateAllocateMemoryParameters(pAllocDesc, pAllocInfo, ppAllocation)) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::AllocateMemory."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->AllocateMemory(pAllocDesc, pAllocInfo, ppAllocation); +} + +HRESULT Allocator::CreateAliasingResource( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocation || !pResourceDesc || !ppvResource) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateAliasingResource."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateAliasingResource( + pAllocation, + AllocationLocalOffset, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialResourceState, pOptimizedClearValue), + riidResource, + ppvResource); +} + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ +HRESULT Allocator::CreateAliasingResource1( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocation || !pResourceDesc || !ppvResource) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateAliasingResource."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateAliasingResource( + pAllocation, + AllocationLocalOffset, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialResourceState, pOptimizedClearValue), + riidResource, + ppvResource); +} +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ +HRESULT Allocator::CreateAliasingResource2( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_BARRIER_LAYOUT InitialLayout, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats, + REFIID riidResource, + void** ppvResource) +{ + if (!pAllocation || !pResourceDesc || !ppvResource) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreateAliasingResource."); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->CreateAliasingResource( + pAllocation, + AllocationLocalOffset, + CREATE_RESOURCE_PARAMS(pResourceDesc, InitialLayout, pOptimizedClearValue, NumCastableFormats, pCastableFormats), + riidResource, + ppvResource); +} +#endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ + +HRESULT Allocator::CreatePool( + const POOL_DESC* pPoolDesc, + Pool** ppPool) +{ + if (!pPoolDesc || !ppPool || + (pPoolDesc->MaxBlockCount > 0 && pPoolDesc->MaxBlockCount < pPoolDesc->MinBlockCount) || + (pPoolDesc->MinAllocationAlignment > 0 && !IsPow2(pPoolDesc->MinAllocationAlignment))) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to Allocator::CreatePool."); + return E_INVALIDARG; + } + if (!m_Pimpl->HeapFlagsFulfillResourceHeapTier(pPoolDesc->HeapFlags)) + { + D3D12MA_ASSERT(0 && "Invalid pPoolDesc->HeapFlags passed to Allocator::CreatePool. Did you forget to handle ResourceHeapTier=1?"); + return E_INVALIDARG; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + * ppPool = D3D12MA_NEW(m_Pimpl->GetAllocs(), Pool)(this, *pPoolDesc); + HRESULT hr = (*ppPool)->m_Pimpl->Init(); + if (SUCCEEDED(hr)) + { + m_Pimpl->RegisterPool(*ppPool, pPoolDesc->HeapProperties.Type); + } + else + { + D3D12MA_DELETE(m_Pimpl->GetAllocs(), *ppPool); + *ppPool = NULL; + } + return hr; +} + +void Allocator::GetBudget(Budget* pLocalBudget, Budget* pNonLocalBudget) +{ + if (pLocalBudget == NULL && pNonLocalBudget == NULL) + { + return; + } + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->GetBudget(pLocalBudget, pNonLocalBudget); +} + +void Allocator::CalculateStatistics(TotalStatistics* pStats) +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->CalculateStatistics(*pStats); +} + +void Allocator::BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap) const +{ + D3D12MA_ASSERT(ppStatsString); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->BuildStatsString(ppStatsString, DetailedMap); +} + +void Allocator::FreeStatsString(WCHAR* pStatsString) const +{ + if (pStatsString != NULL) + { + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->FreeStatsString(pStatsString); + } +} + +void Allocator::BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext) +{ + D3D12MA_ASSERT(pDesc && ppContext); + + *ppContext = D3D12MA_NEW(m_Pimpl->GetAllocs(), DefragmentationContext)(m_Pimpl, *pDesc, NULL); +} + +void Allocator::ReleaseThis() +{ + // Copy is needed because otherwise we would call destructor and invalidate the structure with callbacks before using it to free memory. + const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->GetAllocs(); + D3D12MA_DELETE(allocationCallbacksCopy, this); +} + +Allocator::Allocator(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocationCallbacks, AllocatorPimpl)(allocationCallbacks, desc)) {} + +Allocator::~Allocator() +{ + D3D12MA_DELETE(m_Pimpl->GetAllocs(), m_Pimpl); +} +#endif // _D3D12MA_ALLOCATOR_FUNCTIONS + +#ifndef _D3D12MA_VIRTUAL_BLOCK_FUNCTIONS +BOOL VirtualBlock::IsEmpty() const +{ + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + return m_Pimpl->m_Metadata->IsEmpty() ? TRUE : FALSE; +} + +void VirtualBlock::GetAllocationInfo(VirtualAllocation allocation, VIRTUAL_ALLOCATION_INFO* pInfo) const +{ + D3D12MA_ASSERT(allocation.AllocHandle != (AllocHandle)0 && pInfo); + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->m_Metadata->GetAllocationInfo(allocation.AllocHandle, *pInfo); +} + +HRESULT VirtualBlock::Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, VirtualAllocation* pAllocation, UINT64* pOffset) +{ + if (!pDesc || !pAllocation || pDesc->Size == 0 || !IsPow2(pDesc->Alignment)) + { + D3D12MA_ASSERT(0 && "Invalid arguments passed to VirtualBlock::Allocate."); + return E_INVALIDARG; + } + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + const UINT64 alignment = pDesc->Alignment != 0 ? pDesc->Alignment : 1; + AllocationRequest allocRequest = {}; + if (m_Pimpl->m_Metadata->CreateAllocationRequest( + pDesc->Size, + alignment, + pDesc->Flags & VIRTUAL_ALLOCATION_FLAG_UPPER_ADDRESS, + pDesc->Flags & VIRTUAL_ALLOCATION_FLAG_STRATEGY_MASK, + &allocRequest)) + { + m_Pimpl->m_Metadata->Alloc(allocRequest, pDesc->Size, pDesc->pPrivateData); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + pAllocation->AllocHandle = allocRequest.allocHandle; + + if (pOffset) + *pOffset = m_Pimpl->m_Metadata->GetAllocationOffset(allocRequest.allocHandle); + return S_OK; + } + + pAllocation->AllocHandle = (AllocHandle)0; + if (pOffset) + *pOffset = UINT64_MAX; + + return E_OUTOFMEMORY; +} + +void VirtualBlock::FreeAllocation(VirtualAllocation allocation) +{ + if (allocation.AllocHandle == (AllocHandle)0) + return; + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + m_Pimpl->m_Metadata->Free(allocation.AllocHandle); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); +} + +void VirtualBlock::Clear() +{ + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + m_Pimpl->m_Metadata->Clear(); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); +} + +void VirtualBlock::SetAllocationPrivateData(VirtualAllocation allocation, void* pPrivateData) +{ + D3D12MA_ASSERT(allocation.AllocHandle != (AllocHandle)0); + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + m_Pimpl->m_Metadata->SetAllocationPrivateData(allocation.AllocHandle, pPrivateData); +} + +void VirtualBlock::GetStatistics(Statistics* pStats) const +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + ClearStatistics(*pStats); + m_Pimpl->m_Metadata->AddStatistics(*pStats); +} + +void VirtualBlock::CalculateStatistics(DetailedStatistics* pStats) const +{ + D3D12MA_ASSERT(pStats); + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + ClearDetailedStatistics(*pStats); + m_Pimpl->m_Metadata->AddDetailedStatistics(*pStats); +} + +void VirtualBlock::BuildStatsString(WCHAR** ppStatsString) const +{ + D3D12MA_ASSERT(ppStatsString); + + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + + StringBuilder sb(m_Pimpl->m_AllocationCallbacks); + { + JsonWriter json(m_Pimpl->m_AllocationCallbacks, sb); + D3D12MA_HEAVY_ASSERT(m_Pimpl->m_Metadata->Validate()); + json.BeginObject(); + m_Pimpl->m_Metadata->WriteAllocationInfoToJson(json); + json.EndObject(); + } // Scope for JsonWriter + + const size_t length = sb.GetLength(); + WCHAR* result = AllocateArray(m_Pimpl->m_AllocationCallbacks, length + 1); + memcpy(result, sb.GetData(), length * sizeof(WCHAR)); + result[length] = L'\0'; + *ppStatsString = result; +} + +void VirtualBlock::FreeStatsString(WCHAR* pStatsString) const +{ + if (pStatsString != NULL) + { + D3D12MA_DEBUG_GLOBAL_MUTEX_LOCK + D3D12MA::Free(m_Pimpl->m_AllocationCallbacks, pStatsString); + } +} + +void VirtualBlock::ReleaseThis() +{ + // Copy is needed because otherwise we would call destructor and invalidate the structure with callbacks before using it to free memory. + const ALLOCATION_CALLBACKS allocationCallbacksCopy = m_Pimpl->m_AllocationCallbacks; + D3D12MA_DELETE(allocationCallbacksCopy, this); +} + +VirtualBlock::VirtualBlock(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc) + : m_Pimpl(D3D12MA_NEW(allocationCallbacks, VirtualBlockPimpl)(allocationCallbacks, desc)) {} + +VirtualBlock::~VirtualBlock() +{ + // THIS IS AN IMPORTANT ASSERT! + // Hitting it means you have some memory leak - unreleased allocations in this virtual block. + D3D12MA_ASSERT(m_Pimpl->m_Metadata->IsEmpty() && "Some allocations were not freed before destruction of this virtual block!"); + + D3D12MA_DELETE(m_Pimpl->m_AllocationCallbacks, m_Pimpl); +} +#endif // _D3D12MA_VIRTUAL_BLOCK_FUNCTIONS +#endif // _D3D12MA_PUBLIC_INTERFACE +} // namespace D3D12MA + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(MSVC) +# pragma warning(pop) +#endif diff --git a/External/memalloc/D3D12MemAlloc.h b/External/memalloc/D3D12MemAlloc.h new file mode 100644 index 00000000..e07b7d01 --- /dev/null +++ b/External/memalloc/D3D12MemAlloc.h @@ -0,0 +1,2636 @@ +// +// Copyright (c) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#pragma once + +/** \mainpage D3D12 Memory Allocator + +Version 2.1.0-development (2024-07-05) + +Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT + +Documentation of all members: D3D12MemAlloc.h + +\section main_table_of_contents Table of contents + +- \subpage quick_start + - [Project setup](@ref quick_start_project_setup) + - [Creating resources](@ref quick_start_creating_resources) + - [Resource reference counting](@ref quick_start_resource_reference_counting) + - [Mapping memory](@ref quick_start_mapping_memory) +- \subpage custom_pools +- \subpage defragmentation +- \subpage statistics +- \subpage resource_aliasing +- \subpage linear_algorithm +- \subpage virtual_allocator +- \subpage configuration + - [Custom CPU memory allocator](@ref custom_memory_allocator) + - [Debug margins](@ref debug_margins) +- \subpage general_considerations + - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) + - [Features not supported](@ref general_considerations_features_not_supported) + +\section main_see_also See also + +- [Product page on GPUOpen](https://gpuopen.com/gaming-product/d3d12-memory-allocator/) +- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator) +*/ + +// If using this library on a platform different than Windows PC or want to use different version of DXGI, +// you should include D3D12-compatible headers before this library on your own and define +// D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED. +// Alternatively, if you are targeting the open sourced DirectX headers, defining D3D12MA_USING_DIRECTX_HEADERS +// will include them rather the ones provided by the Windows SDK. +#ifndef D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED + #if defined(D3D12MA_USING_DIRECTX_HEADERS) + #include + #include + #else + #include + #endif + + #include +#endif + +// Define this macro to 0 to disable usage of DXGI 1.4 (needed for IDXGIAdapter3 and query for memory budget). +#ifndef D3D12MA_DXGI_1_4 + #ifdef __IDXGIAdapter3_INTERFACE_DEFINED__ + #define D3D12MA_DXGI_1_4 1 + #else + #define D3D12MA_DXGI_1_4 0 + #endif +#endif + +/* +When defined to value other than 0, the library will try to use +D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT or D3D12_SMALL_MSAA_RESOURCE_PLACEMENT_ALIGNMENT +for created textures when possible, which can save memory because some small textures +may get their alignment 4K and their size a multiply of 4K instead of 64K. + +#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 0 + Disables small texture alignment. +#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1 + Enables conservative algorithm that will use small alignment only for some textures + that are surely known to support it. +#define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 2 + Enables query for small alignment to D3D12 (based on Microsoft sample) which will + enable small alignment for more textures, but will also generate D3D Debug Layer + error #721 on call to ID3D12Device::GetResourceAllocationInfo, which you should just + ignore. +*/ +#ifndef D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT + #define D3D12MA_USE_SMALL_RESOURCE_PLACEMENT_ALIGNMENT 1 +#endif + +/// \cond INTERNAL + +#define D3D12MA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; + +// To be used with MAKE_HRESULT to define custom error codes. +#define FACILITY_D3D12MA 3542 + +/* +If providing your own implementation, you need to implement a subset of std::atomic. +*/ +#if !defined(D3D12MA_ATOMIC_UINT32) || !defined(D3D12MA_ATOMIC_UINT64) + #include +#endif + +#ifndef D3D12MA_ATOMIC_UINT32 + #define D3D12MA_ATOMIC_UINT32 std::atomic +#endif + +#ifndef D3D12MA_ATOMIC_UINT64 + #define D3D12MA_ATOMIC_UINT64 std::atomic +#endif + +#ifdef D3D12MA_EXPORTS + #define D3D12MA_API __declspec(dllexport) +#elif defined(D3D12MA_IMPORTS) + #define D3D12MA_API __declspec(dllimport) +#else + #define D3D12MA_API +#endif + +// Forward declaration if ID3D12ProtectedResourceSession is not defined inside the headers (older SDK, pre ID3D12Device4) +struct ID3D12ProtectedResourceSession; + +// Define this enum even if SDK doesn't provide it, to simplify the API. +#ifndef __ID3D12Device1_INTERFACE_DEFINED__ +typedef enum D3D12_RESIDENCY_PRIORITY +{ + D3D12_RESIDENCY_PRIORITY_MINIMUM = 0x28000000, + D3D12_RESIDENCY_PRIORITY_LOW = 0x50000000, + D3D12_RESIDENCY_PRIORITY_NORMAL = 0x78000000, + D3D12_RESIDENCY_PRIORITY_HIGH = 0xa0010000, + D3D12_RESIDENCY_PRIORITY_MAXIMUM = 0xc8000000 +} D3D12_RESIDENCY_PRIORITY; +#endif + +namespace D3D12MA +{ +class D3D12MA_API IUnknownImpl : public IUnknown +{ +public: + virtual ~IUnknownImpl() = default; + virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject); + virtual ULONG STDMETHODCALLTYPE AddRef(); + virtual ULONG STDMETHODCALLTYPE Release(); +protected: + virtual void ReleaseThis() { delete this; } +private: + D3D12MA_ATOMIC_UINT32 m_RefCount = {1}; +}; +} // namespace D3D12MA + +/// \endcond + +namespace D3D12MA +{ + +/// \cond INTERNAL +class DefragmentationContextPimpl; +class AllocatorPimpl; +class PoolPimpl; +class NormalBlock; +class BlockVector; +class CommittedAllocationList; +class JsonWriter; +class VirtualBlockPimpl; +/// \endcond + +class Pool; +class Allocator; +struct Statistics; +struct DetailedStatistics; +struct TotalStatistics; + +/// \brief Unique identifier of single allocation done inside the memory heap. +typedef UINT64 AllocHandle; + +/// Pointer to custom callback function that allocates CPU memory. +using ALLOCATE_FUNC_PTR = void* (*)(size_t Size, size_t Alignment, void* pPrivateData); +/** +\brief Pointer to custom callback function that deallocates CPU memory. + +`pMemory = null` should be accepted and ignored. +*/ +using FREE_FUNC_PTR = void (*)(void* pMemory, void* pPrivateData); + +/// Custom callbacks to CPU memory allocation functions. +struct ALLOCATION_CALLBACKS +{ + /// %Allocation function. + ALLOCATE_FUNC_PTR pAllocate; + /// Dellocation function. + FREE_FUNC_PTR pFree; + /// Custom data that will be passed to allocation and deallocation functions as `pUserData` parameter. + void* pPrivateData; +}; + + +/// \brief Bit flags to be used with ALLOCATION_DESC::Flags. +enum ALLOCATION_FLAGS +{ + /// Zero + ALLOCATION_FLAG_NONE = 0, + + /** + Set this flag if the allocation should have its own dedicated memory allocation (committed resource with implicit heap). + + Use it for special, big resources, like fullscreen textures used as render targets. + + - When used with functions like D3D12MA::Allocator::CreateResource, it will use `ID3D12Device::CreateCommittedResource`, + so the created allocation will contain a resource (D3D12MA::Allocation::GetResource() `!= NULL`) but will not have + a heap (D3D12MA::Allocation::GetHeap() `== NULL`), as the heap is implicit. + - When used with raw memory allocation like D3D12MA::Allocator::AllocateMemory, it will use `ID3D12Device::CreateHeap`, + so the created allocation will contain a heap (D3D12MA::Allocation::GetHeap() `!= NULL`) and its offset will always be 0. + */ + ALLOCATION_FLAG_COMMITTED = 0x1, + + /** + Set this flag to only try to allocate from existing memory heaps and never create new such heap. + + If new allocation cannot be placed in any of the existing heaps, allocation + fails with `E_OUTOFMEMORY` error. + + You should not use D3D12MA::ALLOCATION_FLAG_COMMITTED and + D3D12MA::ALLOCATION_FLAG_NEVER_ALLOCATE at the same time. It makes no sense. + */ + ALLOCATION_FLAG_NEVER_ALLOCATE = 0x2, + + /** Create allocation only if additional memory required for it, if any, won't exceed + memory budget. Otherwise return `E_OUTOFMEMORY`. + */ + ALLOCATION_FLAG_WITHIN_BUDGET = 0x4, + + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #POOL_FLAG_ALGORITHM_LINEAR flag. + */ + ALLOCATION_FLAG_UPPER_ADDRESS = 0x8, + + /** Set this flag if the allocated memory will have aliasing resources. + + Use this when calling D3D12MA::Allocator::CreateResource() and similar to + guarantee creation of explicit heap for desired allocation and prevent it from using `CreateCommittedResource`, + so that new allocation object will always have `allocation->GetHeap() != NULL`. + */ + ALLOCATION_FLAG_CAN_ALIAS = 0x10, + + /** %Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = 0x00010000, + + /** %Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + ALLOCATION_FLAG_STRATEGY_MIN_TIME = 0x00020000, + + /** %Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recomended in typical usage. + */ + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET = 0x0004000, + + /// Alias to #ALLOCATION_FLAG_STRATEGY_MIN_MEMORY. + ALLOCATION_FLAG_STRATEGY_BEST_FIT = ALLOCATION_FLAG_STRATEGY_MIN_MEMORY, + /// Alias to #ALLOCATION_FLAG_STRATEGY_MIN_TIME. + ALLOCATION_FLAG_STRATEGY_FIRST_FIT = ALLOCATION_FLAG_STRATEGY_MIN_TIME, + + /// A bit mask to extract only `STRATEGY` bits from entire set of flags. + ALLOCATION_FLAG_STRATEGY_MASK = + ALLOCATION_FLAG_STRATEGY_MIN_MEMORY | + ALLOCATION_FLAG_STRATEGY_MIN_TIME | + ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, +}; + +/// \brief Parameters of created D3D12MA::Allocation object. To be used with Allocator::CreateResource. +struct ALLOCATION_DESC +{ + /// Flags. + ALLOCATION_FLAGS Flags; + /** \brief The type of memory heap where the new allocation should be placed. + + It must be one of: `D3D12_HEAP_TYPE_DEFAULT`, `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`. + + When D3D12MA::ALLOCATION_DESC::CustomPool != NULL this member is ignored. + */ + D3D12_HEAP_TYPE HeapType; + /** \brief Additional heap flags to be used when allocating memory. + + In most cases it can be 0. + + - If you use D3D12MA::Allocator::CreateResource(), you don't need to care. + Necessary flag `D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS`, `D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES`, + or `D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES` is added automatically. + - If you use D3D12MA::Allocator::AllocateMemory(), you should specify one of those `ALLOW_ONLY` flags. + Except when you validate that D3D12MA::Allocator::GetD3D12Options()`.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1` - + then you can leave it 0. + - You can specify additional flags if needed. Then the memory will always be allocated as + separate block using `D3D12Device::CreateCommittedResource` or `CreateHeap`, not as part of an existing larget block. + + When D3D12MA::ALLOCATION_DESC::CustomPool != NULL this member is ignored. + */ + D3D12_HEAP_FLAGS ExtraHeapFlags; + /** \brief Custom pool to place the new resource in. Optional. + + When not NULL, the resource will be created inside specified custom pool. + */ + Pool* CustomPool; + /// Custom general-purpose pointer that will be stored in D3D12MA::Allocation. + void* pPrivateData; +}; + +/** \brief Calculated statistics of memory usage e.g. in a specific memory heap type, +memory segment group, custom pool, or total. + +These are fast to calculate. +See functions: D3D12MA::Allocator::GetBudget(), D3D12MA::Pool::GetStatistics(). +*/ +struct Statistics +{ + /** \brief Number of D3D12 memory blocks allocated - `ID3D12Heap` objects and committed resources. + */ + UINT BlockCount; + /** \brief Number of D3D12MA::Allocation objects allocated. + + Committed allocations have their own blocks, so each one adds 1 to `AllocationCount` as well as `BlockCount`. + */ + UINT AllocationCount; + /** \brief Number of bytes allocated in memory blocks. + */ + UINT64 BlockBytes; + /** \brief Total number of bytes occupied by all D3D12MA::Allocation objects. + + Always less or equal than `BlockBytes`. + Difference `(BlockBytes - AllocationBytes)` is the amount of memory allocated from D3D12 + but unused by any D3D12MA::Allocation. + */ + UINT64 AllocationBytes; +}; + +/** \brief More detailed statistics than D3D12MA::Statistics. + +These are slower to calculate. Use for debugging purposes. +See functions: D3D12MA::Allocator::CalculateStatistics(), D3D12MA::Pool::CalculateStatistics(). + +Averages are not provided because they can be easily calculated as: + +\code +UINT64 AllocationSizeAvg = DetailedStats.Statistics.AllocationBytes / detailedStats.Statistics.AllocationCount; +UINT64 UnusedBytes = DetailedStats.Statistics.BlockBytes - DetailedStats.Statistics.AllocationBytes; +UINT64 UnusedRangeSizeAvg = UnusedBytes / DetailedStats.UnusedRangeCount; +\endcode +*/ +struct DetailedStatistics +{ + /// Basic statistics. + Statistics Stats; + /// Number of free ranges of memory between allocations. + UINT UnusedRangeCount; + /// Smallest allocation size. `UINT64_MAX` if there are 0 allocations. + UINT64 AllocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + UINT64 AllocationSizeMax; + /// Smallest empty range size. `UINT64_MAX` if there are 0 empty ranges. + UINT64 UnusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + UINT64 UnusedRangeSizeMax; +}; + +/** \brief General statistics from current state of the allocator - +total memory usage across all memory heaps and segments. + +These are slower to calculate. Use for debugging purposes. +See function D3D12MA::Allocator::CalculateStatistics(). +*/ +struct TotalStatistics +{ + /** \brief One element for each type of heap located at the following indices: + + - 0 = `D3D12_HEAP_TYPE_DEFAULT` + - 1 = `D3D12_HEAP_TYPE_UPLOAD` + - 2 = `D3D12_HEAP_TYPE_READBACK` + - 3 = `D3D12_HEAP_TYPE_CUSTOM` + - 4 = `D3D12_HEAP_TYPE_GPU_UPLOAD` + */ + DetailedStatistics HeapType[5]; + /** \brief One element for each memory segment group located at the following indices: + + - 0 = `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` + - 1 = `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` + + Meaning of these segment groups is: + + - When `IsUMA() == FALSE` (discrete graphics card): + - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` (index 0) represents GPU memory + (resources allocated in `D3D12_HEAP_TYPE_DEFAULT`, `D3D12_HEAP_TYPE_GPU_UPLOAD` or `D3D12_MEMORY_POOL_L1`). + - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` (index 1) represents system memory + (resources allocated in `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`, or `D3D12_MEMORY_POOL_L0`). + - When `IsUMA() == TRUE` (integrated graphics chip): + - `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` = (index 0) represents memory shared for all the resources. + - `DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL` = (index 1) is unused and always 0. + */ + DetailedStatistics MemorySegmentGroup[2]; + /// Total statistics from all memory allocated from D3D12. + DetailedStatistics Total; +}; + +/** \brief %Statistics of current memory usage and available budget for a specific memory segment group. + +These are fast to calculate. See function D3D12MA::Allocator::GetBudget(). +*/ +struct Budget +{ + /** \brief %Statistics fetched from the library. + */ + Statistics Stats; + /** \brief Estimated current memory usage of the program. + + Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if possible. + + It might be different than `BlockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipeline state objects, descriptor heaps, command lists, or + heaps and resources allocated outside of this library, if any. + */ + UINT64 UsageBytes; + /** \brief Estimated amount of memory available to the program. + + Fetched from system using `IDXGIAdapter3::QueryVideoMemoryInfo` if possible. + + It might be different (most probably smaller) than memory capacity returned + by D3D12MA::Allocator::GetMemoryCapacity() due to factors + external to the program, decided by the operating system. + Difference `BudgetBytes - UsageBytes` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + UINT64 BudgetBytes; +}; + + +/// \brief Represents single memory allocation done inside VirtualBlock. +struct D3D12MA_API VirtualAllocation +{ + /// \brief Unique idenitfier of current allocation. 0 means null/invalid. + AllocHandle AllocHandle; +}; + +/** \brief Represents single memory allocation. + +It may be either implicit memory heap dedicated to a single resource or a +specific region of a bigger heap plus unique offset. + +To create such object, fill structure D3D12MA::ALLOCATION_DESC and call function +Allocator::CreateResource. + +The object remembers size and some other information. +To retrieve this information, use methods of this class. + +The object also remembers `ID3D12Resource` and "owns" a reference to it, +so it calls `%Release()` on the resource when destroyed. +*/ +class D3D12MA_API Allocation : public IUnknownImpl +{ +public: + /** \brief Returns offset in bytes from the start of memory heap. + + You usually don't need to use this offset. If you create a buffer or a texture together with the allocation using function + D3D12MA::Allocator::CreateResource, functions that operate on that resource refer to the beginning of the resource, + not entire memory heap. + + If the Allocation represents committed resource with implicit heap, returns 0. + */ + UINT64 GetOffset() const; + + /// Returns alignment that resource was created with. + UINT64 GetAlignment() const { return m_Alignment; } + + /** \brief Returns size in bytes of the allocation. + + - If you created a buffer or a texture together with the allocation using function D3D12MA::Allocator::CreateResource, + this is the size of the resource returned by `ID3D12Device::GetResourceAllocationInfo`. + - For allocations made out of bigger memory blocks, this also is the size of the memory region assigned exclusively to this allocation. + - For resources created as committed, this value may not be accurate. DirectX implementation may optimize memory usage internally + so that you may even observe regions of `ID3D12Resource::GetGPUVirtualAddress()` + Allocation::GetSize() to overlap in memory and still work correctly. + */ + UINT64 GetSize() const { return m_Size; } + + /** \brief Returns D3D12 resource associated with this object. + + Calling this method doesn't increment resource's reference counter. + */ + ID3D12Resource* GetResource() const { return m_Resource; } + + /// Releases the resource currently pointed by the allocation (if any), sets it to new one, incrementing its reference counter (if not null). + void SetResource(ID3D12Resource* pResource); + + /** \brief Returns memory heap that the resource is created in. + + If the Allocation represents committed resource with implicit heap, returns NULL. + */ + ID3D12Heap* GetHeap() const; + + /// Changes custom pointer for an allocation to a new value. + void SetPrivateData(void* pPrivateData) { m_pPrivateData = pPrivateData; } + + /// Get custom pointer associated with the allocation. + void* GetPrivateData() const { return m_pPrivateData; } + + /** \brief Associates a name with the allocation object. This name is for use in debug diagnostics and tools. + + Internal copy of the string is made, so the memory pointed by the argument can be + changed of freed immediately after this call. + + `Name` can be null. + */ + void SetName(LPCWSTR Name); + + /** \brief Returns the name associated with the allocation object. + + Returned string points to an internal copy. + + If no name was associated with the allocation, returns null. + */ + LPCWSTR GetName() const { return m_Name; } + +protected: + void ReleaseThis() override; + +private: + friend class AllocatorPimpl; + friend class BlockVector; + friend class CommittedAllocationList; + friend class JsonWriter; + friend class BlockMetadata_Linear; + friend class DefragmentationContextPimpl; + friend struct CommittedAllocationListItemTraits; + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + template friend class PoolAllocator; + + enum Type + { + TYPE_COMMITTED, + TYPE_PLACED, + TYPE_HEAP, + TYPE_COUNT + }; + + AllocatorPimpl* m_Allocator; + UINT64 m_Size; + UINT64 m_Alignment; + ID3D12Resource* m_Resource; + void* m_pPrivateData; + wchar_t* m_Name; + + union + { + struct + { + CommittedAllocationList* list; + Allocation* prev; + Allocation* next; + } m_Committed; + + struct + { + AllocHandle allocHandle; + NormalBlock* block; + } m_Placed; + + struct + { + // Beginning must be compatible with m_Committed. + CommittedAllocationList* list; + Allocation* prev; + Allocation* next; + ID3D12Heap* heap; + } m_Heap; + }; + + struct PackedData + { + public: + PackedData() : + m_Type(0), m_ResourceDimension(0), m_ResourceFlags(0), m_TextureLayout(0) { } + + Type GetType() const { return (Type)m_Type; } + D3D12_RESOURCE_DIMENSION GetResourceDimension() const { return (D3D12_RESOURCE_DIMENSION)m_ResourceDimension; } + D3D12_RESOURCE_FLAGS GetResourceFlags() const { return (D3D12_RESOURCE_FLAGS)m_ResourceFlags; } + D3D12_TEXTURE_LAYOUT GetTextureLayout() const { return (D3D12_TEXTURE_LAYOUT)m_TextureLayout; } + + void SetType(Type type); + void SetResourceDimension(D3D12_RESOURCE_DIMENSION resourceDimension); + void SetResourceFlags(D3D12_RESOURCE_FLAGS resourceFlags); + void SetTextureLayout(D3D12_TEXTURE_LAYOUT textureLayout); + + private: + UINT m_Type : 2; // enum Type + UINT m_ResourceDimension : 3; // enum D3D12_RESOURCE_DIMENSION + UINT m_ResourceFlags : 24; // flags D3D12_RESOURCE_FLAGS + UINT m_TextureLayout : 9; // enum D3D12_TEXTURE_LAYOUT + } m_PackedData; + + Allocation(AllocatorPimpl* allocator, UINT64 size, UINT64 alignment); + // Nothing here, everything already done in Release. + virtual ~Allocation() = default; + + void InitCommitted(CommittedAllocationList* list); + void InitPlaced(AllocHandle allocHandle, NormalBlock* block); + void InitHeap(CommittedAllocationList* list, ID3D12Heap* heap); + void SwapBlockAllocation(Allocation* allocation); + // If the Allocation represents committed resource with implicit heap, returns UINT64_MAX. + AllocHandle GetAllocHandle() const; + NormalBlock* GetBlock(); + template + void SetResourcePointer(ID3D12Resource* resource, const D3D12_RESOURCE_DESC_T* pResourceDesc); + void FreeName(); + + D3D12MA_CLASS_NO_COPY(Allocation) +}; + + +/// Flags to be passed as DEFRAGMENTATION_DESC::Flags. +enum DEFRAGMENTATION_FLAGS +{ + /** Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_FAST = 0x1, + /** Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED = 0x2, + /** Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + DEFRAGMENTATION_FLAG_ALGORITHM_FULL = 0x4, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + DEFRAGMENTATION_FLAG_ALGORITHM_FAST | + DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED | + DEFRAGMENTATION_FLAG_ALGORITHM_FULL +}; + +/** \brief Parameters for defragmentation. + +To be used with functions Allocator::BeginDefragmentation() and Pool::BeginDefragmentation(). +*/ +struct DEFRAGMENTATION_DESC +{ + /// Flags. + DEFRAGMENTATION_FLAGS Flags; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + 0 means no limit. + */ + UINT64 MaxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + 0 means no limit. + */ + UINT32 MaxAllocationsPerPass; +}; + +/// Operation performed on single defragmentation move. +enum DEFRAGMENTATION_MOVE_OPERATION +{ + /** Resource has been recreated at `pDstTmpAllocation`, data has been copied, old resource has been destroyed. + `pSrcAllocation` will be changed to point to the new place. This is the default value set by DefragmentationContext::BeginPass(). + */ + DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `pDstTmpAllocation` will be freed. `pSrcAllocation` will remain unchanged. + DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the resource. New place reserved `pDstTmpAllocation` will be freed, along with `pSrcAllocation`. + DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +}; + +/// Single move of an allocation to be done for defragmentation. +struct DEFRAGMENTATION_MOVE +{ + /** \brief Operation to be performed on the allocation by DefragmentationContext::EndPass(). + Default value is #DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + */ + DEFRAGMENTATION_MOVE_OPERATION Operation; + /// %Allocation that should be moved. + Allocation* pSrcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `pSrcAllocation`. + + Use it to retrieve new `ID3D12Heap` and offset to create new `ID3D12Resource` and then store it here via Allocation::SetResource(). + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for storing newly created resource. DefragmentationContext::EndPass() will destroy it and make `pSrcAllocation` point to this memory. + */ + Allocation* pDstTmpAllocation; +}; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function DefragmentationContext::BeginPass(). +*/ +struct DEFRAGMENTATION_PASS_MOVE_INFO +{ + /// Number of elements in the `pMoves` array. + UINT32 MoveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `MoveCount` elements, owned by %D3D12MA, created in DefragmentationContext::BeginPass(), destroyed in DefragmentationContext::EndPass(). + + For each element, you should: + + 1. Create a new resource in the place pointed by `pMoves[i].pDstTmpAllocation->GetHeap()` + `pMoves[i].pDstTmpAllocation->GetOffset()`. + 2. Store new resource in `pMoves[i].pDstTmpAllocation` by using Allocation::SetResource(). It will later replace old resource from `pMoves[i].pSrcAllocation`. + 3. Copy data from the `pMoves[i].pSrcAllocation` e.g. using `D3D12GraphicsCommandList::CopyResource`. + 4. Make sure these commands finished executing on the GPU. + + Only then you can finish defragmentation pass by calling DefragmentationContext::EndPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, + you can set DEFRAGMENTATION_MOVE::Operation to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation, + set DEFRAGMENTATION_MOVE::Operation to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + Then, after DefragmentationContext::EndPass() the allocation will be released. + */ + DEFRAGMENTATION_MOVE* pMoves; +}; + +/// %Statistics returned for defragmentation process by function DefragmentationContext::GetStats(). +struct DEFRAGMENTATION_STATS +{ + /// Total number of bytes that have been copied while moving allocations to different places. + UINT64 BytesMoved; + /// Total number of bytes that have been released to the system by freeing empty heaps. + UINT64 BytesFreed; + /// Number of allocations that have been moved to different places. + UINT32 AllocationsMoved; + /// Number of empty `ID3D12Heap` objects that have been released to the system. + UINT32 HeapsFreed; +}; + +/** \brief Represents defragmentation process in progress. + +You can create this object using Allocator::BeginDefragmentation (for default pools) or +Pool::BeginDefragmentation (for a custom pool). +*/ +class D3D12MA_API DefragmentationContext : public IUnknownImpl +{ +public: + /** \brief Starts single defragmentation pass. + + \param[out] pPassInfo Computed informations for current pass. + \returns + - `S_OK` if no more moves are possible. Then you can omit call to DefragmentationContext::EndPass() and simply end whole defragmentation. + - `S_FALSE` if there are pending moves returned in `pPassInfo`. You need to perform them, call DefragmentationContext::EndPass(), + and then preferably try another pass with DefragmentationContext::BeginPass(). + */ + HRESULT BeginPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo); + /** \brief Ends single defragmentation pass. + + \param pPassInfo Computed informations for current pass filled by DefragmentationContext::BeginPass() and possibly modified by you. + \return Returns `S_OK` if no more moves are possible or `S_FALSE` if more defragmentations are possible. + + Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. + After this call: + + - %Allocation at `pPassInfo[i].pSrcAllocation` that had `pPassInfo[i].Operation ==` #DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. + - %Allocation at `pPassInfo[i].pSrcAllocation` that had `pPassInfo[i].operation ==` #DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be released. + + If no more moves are possible you can end whole defragmentation. + */ + HRESULT EndPass(DEFRAGMENTATION_PASS_MOVE_INFO* pPassInfo); + /** \brief Returns statistics of the defragmentation performed so far. + */ + void GetStats(DEFRAGMENTATION_STATS* pStats); + +protected: + void ReleaseThis() override; + +private: + friend class Pool; + friend class Allocator; + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + + DefragmentationContextPimpl* m_Pimpl; + + DefragmentationContext(AllocatorPimpl* allocator, + const DEFRAGMENTATION_DESC& desc, + BlockVector* poolVector); + ~DefragmentationContext(); + + D3D12MA_CLASS_NO_COPY(DefragmentationContext) +}; + +/// \brief Bit flags to be used with POOL_DESC::Flags. +enum POOL_FLAGS +{ + /// Zero + POOL_FLAG_NONE = 0, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + POOL_FLAG_ALGORITHM_LINEAR = 0x1, + + /** \brief Optimization, allocate MSAA textures as committed resources always. + + Specify this flag to create MSAA textures with implicit heaps, as if they were created + with flag D3D12MA::ALLOCATION_FLAG_COMMITTED. Usage of this flags enables pool to create its heaps + on smaller alignment not suitable for MSAA textures. + */ + POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x2, + + // Bit mask to extract only `ALGORITHM` bits from entire set of flags. + POOL_FLAG_ALGORITHM_MASK = POOL_FLAG_ALGORITHM_LINEAR +}; + +/// \brief Parameters of created D3D12MA::Pool object. To be used with D3D12MA::Allocator::CreatePool. +struct POOL_DESC +{ + /// Flags. + POOL_FLAGS Flags; + /** \brief The parameters of memory heap where allocations of this pool should be placed. + + In the simplest case, just fill it with zeros and set `Type` to one of: `D3D12_HEAP_TYPE_DEFAULT`, + `D3D12_HEAP_TYPE_UPLOAD`, `D3D12_HEAP_TYPE_READBACK`. Additional parameters can be used e.g. to utilize UMA. + */ + D3D12_HEAP_PROPERTIES HeapProperties; + /** \brief Heap flags to be used when allocating heaps of this pool. + + It should contain one of these values, depending on type of resources you are going to create in this heap: + `D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS`, + `D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES`, + `D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES`. + Except if ResourceHeapTier = 2, then it may be `D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES` = 0. + + You can specify additional flags if needed. + */ + D3D12_HEAP_FLAGS HeapFlags; + /** \brief Size of a single heap (memory block) to be allocated as part of this pool, in bytes. Optional. + + Specify nonzero to set explicit, constant size of memory blocks used by this pool. + Leave 0 to use default and let the library manage block sizes automatically. + Then sizes of particular blocks may vary. + */ + UINT64 BlockSize; + /** \brief Minimum number of heaps (memory blocks) to be always allocated in this pool, even if they stay empty. Optional. + + Set to 0 to have no preallocated blocks and allow the pool be completely empty. + */ + UINT MinBlockCount; + /** \brief Maximum number of heaps (memory blocks) that can be allocated in this pool. Optional. + + Set to 0 to use default, which is `UINT64_MAX`, which means no limit. + + Set to same value as D3D12MA::POOL_DESC::MinBlockCount to have fixed amount of memory allocated + throughout whole lifetime of this pool. + */ + UINT MaxBlockCount; + /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + + Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. + */ + UINT64 MinAllocationAlignment; + /** \brief Additional parameter allowing pool to create resources with passed protected session. + + If not null then all the heaps and committed resources will be created with this parameter. + Valid only if ID3D12Device4 interface is present in current Windows SDK! + */ + ID3D12ProtectedResourceSession* pProtectedSession; + /** \brief Residency priority to be set for all allocations made in this pool. Optional. + + Set this parameter to one of the possible enum values e.g. `D3D12_RESIDENCY_PRIORITY_HIGH` + to apply specific residency priority to all allocations made in this pool: + `ID3D12Heap` memory blocks used to sub-allocate for placed resources, as well as + committed resources or heaps created when D3D12MA::ALLOCATION_FLAG_COMMITTED is used. + This can increase/decrease chance that the memory will be pushed out from VRAM + to system RAM when the system runs out of memory, which is invisible to the developer + using D3D12 API while it can degrade performance. + + Priority is set using function `ID3D12Device1::SetResidencyPriority`. + It is performed only when `ID3D12Device1` interface is defined and successfully obtained. + Otherwise, this parameter is ignored. + + This parameter is optional. If you set it to `D3D12_RESIDENCY_PRIORITY(0)`, + residency priority will not be set for allocations made in this pool. + + There is no equivalent parameter for allocations made in default pools. + If you want to set residency priority for such allocation, you need to do it manually: + allocate with D3D12MA::ALLOCATION_FLAG_COMMITTED and call + `ID3D12Device1::SetResidencyPriority`, passing `allocation->GetResource()`. + */ + D3D12_RESIDENCY_PRIORITY ResidencyPriority; +}; + +/** \brief Custom memory pool + +Represents a separate set of heaps (memory blocks) that can be used to create +D3D12MA::Allocation-s and resources in it. Usually there is no need to create custom +pools - creating resources in default pool is sufficient. + +To create custom pool, fill D3D12MA::POOL_DESC and call D3D12MA::Allocator::CreatePool. +*/ +class D3D12MA_API Pool : public IUnknownImpl +{ +public: + /** \brief Returns copy of parameters of the pool. + + These are the same parameters as passed to D3D12MA::Allocator::CreatePool. + */ + POOL_DESC GetDesc() const; + + /** \brief Retrieves basic statistics of the custom pool that are fast to calculate. + + \param[out] pStats %Statistics of the current pool. + */ + void GetStatistics(Statistics* pStats); + + /** \brief Retrieves detailed statistics of the custom pool that are slower to calculate. + + \param[out] pStats %Statistics of the current pool. + */ + void CalculateStatistics(DetailedStatistics* pStats); + + /** \brief Associates a name with the pool. This name is for use in debug diagnostics and tools. + + Internal copy of the string is made, so the memory pointed by the argument can be + changed of freed immediately after this call. + + `Name` can be NULL. + */ + void SetName(LPCWSTR Name); + + /** \brief Returns the name associated with the pool object. + + Returned string points to an internal copy. + + If no name was associated with the allocation, returns NULL. + */ + LPCWSTR GetName() const; + + /** \brief Begins defragmentation process of the current pool. + + \param pDesc Structure filled with parameters of defragmentation. + \param[out] ppContext Context object that will manage defragmentation. + \returns + - `S_OK` if defragmentation can begin. + - `E_NOINTERFACE` if defragmentation is not supported. + + For more information about defragmentation, see documentation chapter: + [Defragmentation](@ref defragmentation). + */ + HRESULT BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext); + +protected: + void ReleaseThis() override; + +private: + friend class Allocator; + friend class AllocatorPimpl; + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + + PoolPimpl* m_Pimpl; + + Pool(Allocator* allocator, const POOL_DESC &desc); + ~Pool(); + + D3D12MA_CLASS_NO_COPY(Pool) +}; + + +/// \brief Bit flags to be used with ALLOCATOR_DESC::Flags. +enum ALLOCATOR_FLAGS +{ + /// Zero + ALLOCATOR_FLAG_NONE = 0, + + /** + Allocator and all objects created from it will not be synchronized internally, + so you must guarantee they are used from only one thread at a time or + synchronized by you. + + Using this flag may increase performance because internal mutexes are not used. + */ + ALLOCATOR_FLAG_SINGLETHREADED = 0x1, + + /** + Every allocation will have its own memory block. + To be used for debugging purposes. + */ + ALLOCATOR_FLAG_ALWAYS_COMMITTED = 0x2, + + /** + Heaps created for the default pools will be created with flag `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`, + allowing for their memory to be not zeroed by the system if possible, + which can speed up allocation. + + Only affects default pools. + To use the flag with @ref custom_pools, you need to add it manually: + + \code + poolDesc.heapFlags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + \endcode + + Only avaiable if `ID3D12Device8` is present. Otherwise, the flag is ignored. + */ + ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED = 0x4, + + /** \brief Optimization, allocate MSAA textures as committed resources always. + + Specify this flag to create MSAA textures with implicit heaps, as if they were created + with flag D3D12MA::ALLOCATION_FLAG_COMMITTED. Usage of this flags enables all default pools + to create its heaps on smaller alignment not suitable for MSAA textures. + */ + ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED = 0x8, + /** \brief Disable optimization that prefers creating small buffers as committed to avoid 64 KB alignment. + + By default, the library prefers creating small buffers <= 32 KB as committed, + because drivers tend to pack them better, while placed buffers require 64 KB alignment. + This, however, may decrease performance, as creating committed resources involves allocation of implicit heaps, + which may take longer than creating placed resources in existing heaps. + Passing this flag will disable this committed preference globally for the allocator. + It can also be disabled for a single allocation by using #ALLOCATION_FLAG_STRATEGY_MIN_TIME. + */ + ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED = 0x10, +}; + +/// \brief Parameters of created Allocator object. To be used with CreateAllocator(). +struct ALLOCATOR_DESC +{ + /// Flags. + ALLOCATOR_FLAGS Flags; + + /** Direct3D device object that the allocator should be attached to. + + Allocator is doing `AddRef`/`Release` on this object. + */ + ID3D12Device* pDevice; + + /** \brief Preferred size of a single `ID3D12Heap` block to be allocated. + + Set to 0 to use default, which is currently 64 MiB. + */ + UINT64 PreferredBlockSize; + + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, will be used for all CPU-side memory allocations. + */ + const ALLOCATION_CALLBACKS* pAllocationCallbacks; + + /** DXGI Adapter object that you use for D3D12 and this allocator. + + Allocator is doing `AddRef`/`Release` on this object. + */ + IDXGIAdapter* pAdapter; +}; + +/** +\brief Represents main object of this library initialized for particular `ID3D12Device`. + +Fill structure D3D12MA::ALLOCATOR_DESC and call function CreateAllocator() to create it. +Call method `Release()` to destroy it. + +It is recommended to create just one object of this type per `ID3D12Device` object, +right after Direct3D 12 is initialized and keep it alive until before Direct3D device is destroyed. +*/ +class D3D12MA_API Allocator : public IUnknownImpl +{ +public: + /// Returns cached options retrieved from D3D12 device. + const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetD3D12Options() const; + /** \brief Returns true if `D3D12_FEATURE_DATA_ARCHITECTURE1::UMA` was found to be true. + + For more information about how to use it, see articles in Microsoft Docs articles: + + - "UMA Optimizations: CPU Accessible Textures and Standard Swizzle" + - "D3D12_FEATURE_DATA_ARCHITECTURE structure (d3d12.h)" + - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" + */ + BOOL IsUMA() const; + /** \brief Returns true if `D3D12_FEATURE_DATA_ARCHITECTURE1::CacheCoherentUMA` was found to be true. + + For more information about how to use it, see articles in Microsoft Docs articles: + + - "UMA Optimizations: CPU Accessible Textures and Standard Swizzle" + - "D3D12_FEATURE_DATA_ARCHITECTURE structure (d3d12.h)" + - "ID3D12Device::GetCustomHeapProperties method (d3d12.h)" + */ + BOOL IsCacheCoherentUMA() const; + /** \brief Returns true if GPU Upload Heaps are supported on the current system. + + When true, you can use `D3D12_HEAP_TYPE_GPU_UPLOAD`. + + This flag is fetched from `D3D12_FEATURE_D3D12_OPTIONS16::GPUUploadHeapSupported`. + */ + BOOL IsGPUUploadHeapSupported() const; + /** \brief Returns total amount of memory of specific segment group, in bytes. + + \param memorySegmentGroup use `DXGI_MEMORY_SEGMENT_GROUP_LOCAL` or DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL`. + + This information is taken from `DXGI_ADAPTER_DESC`. + It is not recommended to use this number. + You should preferably call GetBudget() and limit memory usage to D3D12MA::Budget::BudgetBytes instead. + + - When IsUMA() `== FALSE` (discrete graphics card): + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL)` returns the size of the video memory. + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL)` returns the size of the system memory available for D3D12 resources. + - When IsUMA() `== TRUE` (integrated graphics chip): + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_LOCAL)` returns the size of the shared memory available for all D3D12 resources. + All memory is considered "local". + - `GetMemoryCapacity(DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL)` is not applicable and returns 0. + */ + UINT64 GetMemoryCapacity(UINT memorySegmentGroup) const; + + /** \brief Allocates memory and creates a D3D12 resource (buffer or texture). This is the main allocation function. + + The function is similar to `ID3D12Device::CreateCommittedResource`, but it may + really call `ID3D12Device::CreatePlacedResource` to assign part of a larger, + existing memory heap to the new resource, which is the main purpose of this + whole library. + + If `ppvResource` is null, you receive only `ppAllocation` object from this function. + It holds pointer to `ID3D12Resource` that can be queried using function D3D12MA::Allocation::GetResource(). + Reference count of the resource object is 1. + It is automatically destroyed when you destroy the allocation object. + + If `ppvResource` is not null, you receive pointer to the resource next to allocation object. + Reference count of the resource object is then increased by calling `QueryInterface`, so you need to manually `Release` it + along with the allocation. + + \param pAllocDesc Parameters of the allocation. + \param pResourceDesc Description of created resource. + \param InitialResourceState Initial resource state. + \param pOptimizedClearValue Optional. Either null or optimized clear value. + \param[out] ppAllocation Filled with pointer to new allocation object created. + \param riidResource IID of a resource to be returned via `ppvResource`. + \param[out] ppvResource Optional. If not null, filled with pointer to new resouce created. + + \note This function creates a new resource. Sub-allocation of parts of one large buffer, + although recommended as a good practice, is out of scope of this library and could be implemented + by the user as a higher-level logic on top of it, e.g. using the \ref virtual_allocator feature. + */ + HRESULT CreateResource( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + /** \brief Similar to Allocator::CreateResource, but supports new structure `D3D12_RESOURCE_DESC1`. + + It internally uses `ID3D12Device8::CreateCommittedResource2` or `ID3D12Device8::CreatePlacedResource1`. + + To work correctly, `ID3D12Device8` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + */ + HRESULT CreateResource2( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + /** \brief Similar to Allocator::CreateResource2, but there are initial layout instead of state and + castable formats list + + It internally uses `ID3D12Device10::CreateCommittedResource3` or `ID3D12Device10::CreatePlacedResource2`. + + To work correctly, `ID3D12Device10` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + */ + HRESULT CreateResource3(const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_BARRIER_LAYOUT InitialLayout, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats, + Allocation** ppAllocation, + REFIID riidResource, + void** ppvResource); +#endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ + + /** \brief Allocates memory without creating any resource placed in it. + + This function is similar to `ID3D12Device::CreateHeap`, but it may really assign + part of a larger, existing heap to the allocation. + + `pAllocDesc->heapFlags` should contain one of these values, depending on type of resources you are going to create in this memory: + `D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS`, + `D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES`, + `D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES`. + Except if you validate that ResourceHeapTier = 2 - then `heapFlags` + may be `D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES` = 0. + Additional flags in `heapFlags` are allowed as well. + + `pAllocInfo->SizeInBytes` must be multiply of 64KB. + `pAllocInfo->Alignment` must be one of the legal values as described in documentation of `D3D12_HEAP_DESC`. + + If you use D3D12MA::ALLOCATION_FLAG_COMMITTED you will get a separate memory block - + a heap that always has offset 0. + */ + HRESULT AllocateMemory( + const ALLOCATION_DESC* pAllocDesc, + const D3D12_RESOURCE_ALLOCATION_INFO* pAllocInfo, + Allocation** ppAllocation); + + /** \brief Creates a new resource in place of an existing allocation. This is useful for memory aliasing. + + \param pAllocation Existing allocation indicating the memory where the new resource should be created. + It can be created using D3D12MA::Allocator::CreateResource and already have a resource bound to it, + or can be a raw memory allocated with D3D12MA::Allocator::AllocateMemory. + It must not be created as committed so that `ID3D12Heap` is available and not implicit. + \param AllocationLocalOffset Additional offset in bytes to be applied when allocating the resource. + Local from the start of `pAllocation`, not the beginning of the whole `ID3D12Heap`! + If the new resource should start from the beginning of the `pAllocation` it should be 0. + \param pResourceDesc Description of the new resource to be created. + \param InitialResourceState + \param pOptimizedClearValue + \param riidResource + \param[out] ppvResource Returns pointer to the new resource. + The resource is not bound with `pAllocation`. + This pointer must not be null - you must get the resource pointer and `Release` it when no longer needed. + + Memory requirements of the new resource are checked for validation. + If its size exceeds the end of `pAllocation` or required alignment is not fulfilled + considering `pAllocation->GetOffset() + AllocationLocalOffset`, the function + returns `E_INVALIDARG`. + */ + HRESULT CreateAliasingResource( + Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE *pOptimizedClearValue, + REFIID riidResource, + void** ppvResource); + +#ifdef __ID3D12Device8_INTERFACE_DEFINED__ + /** \brief Similar to Allocator::CreateAliasingResource, but supports new structure `D3D12_RESOURCE_DESC1`. + + It internally uses `ID3D12Device8::CreatePlacedResource1`. + + To work correctly, `ID3D12Device8` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + */ + HRESULT CreateAliasingResource1(Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_RESOURCE_STATES InitialResourceState, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + REFIID riidResource, + void** ppvResource); +#endif // #ifdef __ID3D12Device8_INTERFACE_DEFINED__ + +#ifdef __ID3D12Device10_INTERFACE_DEFINED__ + /** \brief Similar to Allocator::CreateAliasingResource1, but there are initial layout instead of state and + castable formats list + + It internally uses `ID3D12Device10::CreatePlacedResource2`. + + To work correctly, `ID3D12Device10` interface must be available in the current system. Otherwise, `E_NOINTERFACE` is returned. + */ + HRESULT CreateAliasingResource2(Allocation* pAllocation, + UINT64 AllocationLocalOffset, + const D3D12_RESOURCE_DESC1* pResourceDesc, + D3D12_BARRIER_LAYOUT InitialLayout, + const D3D12_CLEAR_VALUE* pOptimizedClearValue, + UINT32 NumCastableFormats, + DXGI_FORMAT* pCastableFormats, + REFIID riidResource, + void** ppvResource); +#endif // #ifdef __ID3D12Device10_INTERFACE_DEFINED__ + + /** \brief Creates custom pool. + */ + HRESULT CreatePool( + const POOL_DESC* pPoolDesc, + Pool** ppPool); + + /** \brief Retrieves information about current memory usage and budget. + + \param[out] pLocalBudget Optional, can be null. + \param[out] pNonLocalBudget Optional, can be null. + + - When IsUMA() `== FALSE` (discrete graphics card): + - `pLocalBudget` returns the budget of the video memory. + - `pNonLocalBudget` returns the budget of the system memory available for D3D12 resources. + - When IsUMA() `== TRUE` (integrated graphics chip): + - `pLocalBudget` returns the budget of the shared memory available for all D3D12 resources. + All memory is considered "local". + - `pNonLocalBudget` is not applicable and returns zeros. + + This function is called "get" not "calculate" because it is very fast, suitable to be called + every frame or every allocation. For more detailed statistics use CalculateStatistics(). + + Note that when using allocator from multiple threads, returned information may immediately + become outdated. + */ + void GetBudget(Budget* pLocalBudget, Budget* pNonLocalBudget); + + /** \brief Retrieves statistics from current state of the allocator. + + This function is called "calculate" not "get" because it has to traverse all + internal data structures, so it may be quite slow. Use it for debugging purposes. + For faster but more brief statistics suitable to be called every frame or every allocation, + use GetBudget(). + + Note that when using allocator from multiple threads, returned information may immediately + become outdated. + */ + void CalculateStatistics(TotalStatistics* pStats); + + /** \brief Builds and returns statistics as a string in JSON format. + * + @param[out] ppStatsString Must be freed using Allocator::FreeStatsString. + @param DetailedMap `TRUE` to include full list of allocations (can make the string quite long), `FALSE` to only return statistics. + */ + void BuildStatsString(WCHAR** ppStatsString, BOOL DetailedMap) const; + + /// Frees memory of a string returned from Allocator::BuildStatsString. + void FreeStatsString(WCHAR* pStatsString) const; + + /** \brief Begins defragmentation process of the default pools. + + \param pDesc Structure filled with parameters of defragmentation. + \param[out] ppContext Context object that will manage defragmentation. + + For more information about defragmentation, see documentation chapter: + [Defragmentation](@ref defragmentation). + */ + void BeginDefragmentation(const DEFRAGMENTATION_DESC* pDesc, DefragmentationContext** ppContext); + +protected: + void ReleaseThis() override; + +private: + friend D3D12MA_API HRESULT CreateAllocator(const ALLOCATOR_DESC*, Allocator**); + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + friend class DefragmentationContext; + friend class Pool; + + Allocator(const ALLOCATION_CALLBACKS& allocationCallbacks, const ALLOCATOR_DESC& desc); + ~Allocator(); + + AllocatorPimpl* m_Pimpl; + + D3D12MA_CLASS_NO_COPY(Allocator) +}; + + +/// \brief Bit flags to be used with VIRTUAL_BLOCK_DESC::Flags. +enum VIRTUAL_BLOCK_FLAGS +{ + /// Zero + VIRTUAL_BLOCK_FLAG_NONE = 0, + + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR = POOL_FLAG_ALGORITHM_LINEAR, + + // Bit mask to extract only `ALGORITHM` bits from entire set of flags. + VIRTUAL_BLOCK_FLAG_ALGORITHM_MASK = POOL_FLAG_ALGORITHM_MASK +}; + +/// Parameters of created D3D12MA::VirtualBlock object to be passed to CreateVirtualBlock(). +struct VIRTUAL_BLOCK_DESC +{ + /// Flags. + VIRTUAL_BLOCK_FLAGS Flags; + /** \brief Total size of the block. + + Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. + For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. + */ + UINT64 Size; + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, will be used for all CPU-side memory allocations. + */ + const ALLOCATION_CALLBACKS* pAllocationCallbacks; +}; + +/// \brief Bit flags to be used with VIRTUAL_ALLOCATION_DESC::Flags. +enum VIRTUAL_ALLOCATION_FLAGS +{ + /// Zero + VIRTUAL_ALLOCATION_FLAG_NONE = 0, + + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR flag. + */ + VIRTUAL_ALLOCATION_FLAG_UPPER_ADDRESS = ALLOCATION_FLAG_UPPER_ADDRESS, + + /// %Allocation strategy that tries to minimize memory usage. + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_MEMORY = ALLOCATION_FLAG_STRATEGY_MIN_MEMORY, + /// %Allocation strategy that tries to minimize allocation time. + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_TIME = ALLOCATION_FLAG_STRATEGY_MIN_TIME, + /** %Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MIN_OFFSET = ALLOCATION_FLAG_STRATEGY_MIN_OFFSET, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #ALLOCATION_FLAGS. + */ + VIRTUAL_ALLOCATION_FLAG_STRATEGY_MASK = ALLOCATION_FLAG_STRATEGY_MASK, +}; + +/// Parameters of created virtual allocation to be passed to VirtualBlock::Allocate(). +struct VIRTUAL_ALLOCATION_DESC +{ + /// Flags. + VIRTUAL_ALLOCATION_FLAGS Flags; + /** \brief Size of the allocation. + + Cannot be zero. + */ + UINT64 Size; + /** \brief Required alignment of the allocation. + + Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. + */ + UINT64 Alignment; + /** \brief Custom pointer to be associated with the allocation. + + It can be fetched or changed later. + */ + void* pPrivateData; +}; + +/// Parameters of an existing virtual allocation, returned by VirtualBlock::GetAllocationInfo(). +struct VIRTUAL_ALLOCATION_INFO +{ + /// \brief Offset of the allocation. + UINT64 Offset; + /** \brief Size of the allocation. + + Same value as passed in VIRTUAL_ALLOCATION_DESC::Size. + */ + UINT64 Size; + /** \brief Custom pointer associated with the allocation. + + Same value as passed in VIRTUAL_ALLOCATION_DESC::pPrivateData or VirtualBlock::SetAllocationPrivateData(). + */ + void* pPrivateData; +}; + +/** \brief Represents pure allocation algorithm and a data structure with allocations in some memory block, without actually allocating any GPU memory. + +This class allows to use the core algorithm of the library custom allocations e.g. CPU memory or +sub-allocation regions inside a single GPU buffer. + +To create this object, fill in D3D12MA::VIRTUAL_BLOCK_DESC and call CreateVirtualBlock(). +To destroy it, call its method `VirtualBlock::Release()`. +You need to free all the allocations within this block or call Clear() before destroying it. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +class D3D12MA_API VirtualBlock : public IUnknownImpl +{ +public: + /** \brief Returns true if the block is empty - contains 0 allocations. + */ + BOOL IsEmpty() const; + /** \brief Returns information about an allocation - its offset, size and custom pointer. + */ + void GetAllocationInfo(VirtualAllocation allocation, VIRTUAL_ALLOCATION_INFO* pInfo) const; + + /** \brief Creates new allocation. + \param pDesc + \param[out] pAllocation Unique indentifier of the new allocation within single block. + \param[out] pOffset Returned offset of the new allocation. Optional, can be null. + \return `S_OK` if allocation succeeded, `E_OUTOFMEMORY` if it failed. + + If the allocation failed, `pAllocation->AllocHandle` is set to 0 and `pOffset`, if not null, is set to `UINT64_MAX`. + */ + HRESULT Allocate(const VIRTUAL_ALLOCATION_DESC* pDesc, VirtualAllocation* pAllocation, UINT64* pOffset); + /** \brief Frees the allocation. + + Calling this function with `allocation.AllocHandle == 0` is correct and does nothing. + */ + void FreeAllocation(VirtualAllocation allocation); + /** \brief Frees all the allocations. + */ + void Clear(); + /** \brief Changes custom pointer for an allocation to a new value. + */ + void SetAllocationPrivateData(VirtualAllocation allocation, void* pPrivateData); + /** \brief Retrieves basic statistics of the virtual block that are fast to calculate. + + \param[out] pStats %Statistics of the virtual block. + */ + void GetStatistics(Statistics* pStats) const; + /** \brief Retrieves detailed statistics of the virtual block that are slower to calculate. + + \param[out] pStats %Statistics of the virtual block. + */ + void CalculateStatistics(DetailedStatistics* pStats) const; + + /** \brief Builds and returns statistics as a string in JSON format, including the list of allocations with their parameters. + @param[out] ppStatsString Must be freed using VirtualBlock::FreeStatsString. + */ + void BuildStatsString(WCHAR** ppStatsString) const; + + /** \brief Frees memory of a string returned from VirtualBlock::BuildStatsString. + */ + void FreeStatsString(WCHAR* pStatsString) const; + +protected: + void ReleaseThis() override; + +private: + friend D3D12MA_API HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC*, VirtualBlock**); + template friend void D3D12MA_DELETE(const ALLOCATION_CALLBACKS&, T*); + + VirtualBlockPimpl* m_Pimpl; + + VirtualBlock(const ALLOCATION_CALLBACKS& allocationCallbacks, const VIRTUAL_BLOCK_DESC& desc); + ~VirtualBlock(); + + D3D12MA_CLASS_NO_COPY(VirtualBlock) +}; + + +/** \brief Creates new main D3D12MA::Allocator object and returns it through `ppAllocator`. + +You normally only need to call it once and keep a single Allocator object for your `ID3D12Device`. +*/ +D3D12MA_API HRESULT CreateAllocator(const ALLOCATOR_DESC* pDesc, Allocator** ppAllocator); + +/** \brief Creates new D3D12MA::VirtualBlock object and returns it through `ppVirtualBlock`. + +Note you don't need to create D3D12MA::Allocator to use virtual blocks. +*/ +D3D12MA_API HRESULT CreateVirtualBlock(const VIRTUAL_BLOCK_DESC* pDesc, VirtualBlock** ppVirtualBlock); + +} // namespace D3D12MA + +/// \cond INTERNAL +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::ALLOCATION_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::DEFRAGMENTATION_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::ALLOCATOR_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::POOL_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::VIRTUAL_BLOCK_FLAGS); +DEFINE_ENUM_FLAG_OPERATORS(D3D12MA::VIRTUAL_ALLOCATION_FLAGS); +/// \endcond + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup and initialization + +This is a small, standalone C++ library. It consists of a pair of 2 files: +"D3D12MemAlloc.h" header file with public interface and "D3D12MemAlloc.cpp" with +internal implementation. The only external dependencies are WinAPI, Direct3D 12, +and parts of C/C++ standard library (but STL containers, exceptions, or RTTI are +not used). + +The library is developed and tested using Microsoft Visual Studio 2019, but it +should work with other compilers as well. It is designed for 64-bit code. + +To use the library in your project: + +(1.) Copy files `D3D12MemAlloc.cpp`, `%D3D12MemAlloc.h` to your project. + +(2.) Make `D3D12MemAlloc.cpp` compiling as part of the project, as C++ code. + +(3.) Include library header in each CPP file that needs to use the library. + +\code +#include "D3D12MemAlloc.h" +\endcode + +(4.) Right after you created `ID3D12Device`, fill D3D12MA::ALLOCATOR_DESC +structure and call function D3D12MA::CreateAllocator to create the main +D3D12MA::Allocator object. + +Please note that all symbols of the library are declared inside #D3D12MA namespace. + +\code +IDXGIAdapter* adapter = (...) +ID3D12Device* device = (...) + +D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; +allocatorDesc.pDevice = device; +allocatorDesc.pAdapter = adapter; +// These flags are optional but recommended. +allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED | + D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED; + +D3D12MA::Allocator* allocator; +HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); +\endcode + +(5.) Right before destroying the D3D12 device, destroy the allocator object. + +Objects of this library must be destroyed by calling `Release` method. +They are somewhat compatible with COM: they implement `IUnknown` interface with its virtual methods: `AddRef`, `Release`, `QueryInterface`, +and they are reference-counted internally. +You can use smart pointers designed for COM with objects of this library - e.g. `CComPtr` or `Microsoft::WRL::ComPtr`. +The reference counter is thread-safe. +`QueryInterface` method supports only `IUnknown`, as classes of this library don't define their own GUIDs. + +\code +allocator->Release(); +\endcode + + +\section quick_start_creating_resources Creating resources + +To use the library for creating resources (textures and buffers), call method +D3D12MA::Allocator::CreateResource in the place where you would previously call +`ID3D12Device::CreateCommittedResource`. + +The function has similar syntax, but it expects structure D3D12MA::ALLOCATION_DESC +to be passed along with `D3D12_RESOURCE_DESC` and other parameters for created +resource. This structure describes parameters of the desired memory allocation, +including choice of `D3D12_HEAP_TYPE`. + +The function returns a new object of type D3D12MA::Allocation. +It represents allocated memory and can be queried for size, offset, `ID3D12Heap`. +It also holds a reference to the `ID3D12Resource`, which can be accessed by calling D3D12MA::Allocation::GetResource(). + +\code +D3D12_RESOURCE_DESC resourceDesc = {}; +resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; +resourceDesc.Alignment = 0; +resourceDesc.Width = 1024; +resourceDesc.Height = 1024; +resourceDesc.DepthOrArraySize = 1; +resourceDesc.MipLevels = 1; +resourceDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; +resourceDesc.SampleDesc.Count = 1; +resourceDesc.SampleDesc.Quality = 0; +resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; +resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + +D3D12MA::ALLOCATION_DESC allocationDesc = {}; +allocationDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; + +D3D12MA::Allocation* allocation; +HRESULT hr = allocator->CreateResource( + &allocationDesc, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + &allocation, + IID_NULL, NULL); + +// Use allocation->GetResource()... +\endcode + +You need to release the allocation object when no longer needed. +This will also release the D3D12 resource. + +\code +allocation->Release(); +\endcode + +The advantage of using the allocator instead of creating committed resource, and +the main purpose of this library, is that it can decide to allocate bigger memory +heap internally using `ID3D12Device::CreateHeap` and place multiple resources in +it, at different offsets, using `ID3D12Device::CreatePlacedResource`. The library +manages its own collection of allocated memory blocks (heaps) and remembers which +parts of them are occupied and which parts are free to be used for new resources. + +It is important to remember that resources created as placed don't have their memory +initialized to zeros, but may contain garbage data, so they need to be fully initialized +before usage, e.g. using Clear (`ClearRenderTargetView`), Discard (`DiscardResource`), +or copy (`CopyResource`). + +The library also automatically handles resource heap tier. +When `D3D12_FEATURE_DATA_D3D12_OPTIONS::ResourceHeapTier` equals `D3D12_RESOURCE_HEAP_TIER_1`, +resources of 3 types: buffers, textures that are render targets or depth-stencil, +and other textures must be kept in separate heaps. When `D3D12_RESOURCE_HEAP_TIER_2`, +they can be kept together. By using this library, you don't need to handle this +manually. + + +\section quick_start_resource_reference_counting Resource reference counting + +`ID3D12Resource` and other interfaces of Direct3D 12 use COM, so they are reference-counted. +Objects of this library are reference-counted as well. +An object of type D3D12MA::Allocation remembers the resource (buffer or texture) +that was created together with this memory allocation +and holds a reference to the `ID3D12Resource` object. +(Note this is a difference to Vulkan Memory Allocator, where a `VmaAllocation` object has no connection +with the buffer or image that was created with it.) +Thus, it is important to manage the resource reference counter properly. + +The simplest use case is shown in the code snippet above. +When only D3D12MA::Allocation object is obtained from a function call like D3D12MA::Allocator::CreateResource, +it remembers the `ID3D12Resource` that was created with it and holds a reference to it. +The resource can be obtained by calling `allocation->GetResource()`, which doesn't increment the resource +reference counter. +Calling `allocation->Release()` will decrease the resource reference counter, which is = 1 in this case, +so the resource will be released. + +Second option is to retrieve a pointer to the resource along with D3D12MA::Allocation. +Last parameters of the resource creation function can be used for this purpose. + +\code +D3D12MA::Allocation* allocation; +ID3D12Resource* resource; +HRESULT hr = allocator->CreateResource( + &allocationDesc, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + &allocation, + IID_PPV_ARGS(&resource)); + +// Use resource... +\endcode + +In this case, returned pointer `resource` is equal to `allocation->GetResource()`, +but the creation function additionally increases resource reference counter for the purpose of returning it from this call +(it actually calls `QueryInterface` internally), so the resource will have the counter = 2. +The resource then need to be released along with the allocation, in this particular order, +to make sure the resource is destroyed before its memory heap can potentially be freed. + +\code +resource->Release(); +allocation->Release(); +\endcode + +More advanced use cases are possible when we consider that an D3D12MA::Allocation object can just hold +a reference to any resource. +It can be changed by calling D3D12MA::Allocation::SetResource. This function +releases the old resource and calls `AddRef` on the new one. + +Special care must be taken when performing defragmentation. +The new resource created at the destination place should be set as `pass.pMoves[i].pDstTmpAllocation->SetResource(newRes)`, +but it is moved to the source allocation at end of the defragmentation pass, +while the old resource accessible through `pass.pMoves[i].pSrcAllocation->GetResource()` is then released. +For more information, see documentation chapter \ref defragmentation. + + +\section quick_start_mapping_memory Mapping memory + +The process of getting regular CPU-side pointer to the memory of a resource in +Direct3D is called "mapping". There are rules and restrictions to this process, +as described in D3D12 documentation of `ID3D12Resource::Map` method. + +Mapping happens on the level of particular resources, not entire memory heaps, +and so it is out of scope of this library. Just as the documentation of the `Map` function says: + +- Returned pointer refers to data of particular subresource, not entire memory heap. +- You can map same resource multiple times. It is ref-counted internally. +- Mapping is thread-safe. +- Unmapping is not required before resource destruction. +- Unmapping may not be required before using written data - some heap types on + some platforms support resources persistently mapped. + +When using this library, you can map and use your resources normally without +considering whether they are created as committed resources or placed resources in one large heap. + +Example for buffer created and filled in `UPLOAD` heap type: + +\code +const UINT64 bufSize = 65536; +const float* bufData = (...); + +D3D12_RESOURCE_DESC resourceDesc = {}; +resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; +resourceDesc.Alignment = 0; +resourceDesc.Width = bufSize; +resourceDesc.Height = 1; +resourceDesc.DepthOrArraySize = 1; +resourceDesc.MipLevels = 1; +resourceDesc.Format = DXGI_FORMAT_UNKNOWN; +resourceDesc.SampleDesc.Count = 1; +resourceDesc.SampleDesc.Quality = 0; +resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; +resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + +D3D12MA::ALLOCATION_DESC allocationDesc = {}; +allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; + +D3D12Resource* resource; +D3D12MA::Allocation* allocation; +HRESULT hr = allocator->CreateResource( + &allocationDesc, + &resourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + &allocation, + IID_PPV_ARGS(&resource)); + +void* mappedPtr; +hr = resource->Map(0, NULL, &mappedPtr); + +memcpy(mappedPtr, bufData, bufSize); + +resource->Unmap(0, NULL); +\endcode + + +\page custom_pools Custom memory pools + +A "pool" is a collection of memory blocks that share certain properties. +Allocator creates 3 default pools: for `D3D12_HEAP_TYPE_DEFAULT`, `UPLOAD`, `READBACK`. +A default pool automatically grows in size. Size of allocated blocks is also variable and managed automatically. +Typical allocations are created in these pools. You can also create custom pools. + +\section custom_pools_usage Usage + +To create a custom pool, fill in structure D3D12MA::POOL_DESC and call function D3D12MA::Allocator::CreatePool +to obtain object D3D12MA::Pool. Example: + +\code +POOL_DESC poolDesc = {}; +poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_DEFAULT; +// These flags are optional but recommended. +poolDesc.Flags = D3D12MA::POOL_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED; +poolDesc.HeapFlags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + +Pool* pool; +HRESULT hr = allocator->CreatePool(&poolDesc, &pool); +\endcode + +To allocate resources out of a custom pool, only set member D3D12MA::ALLOCATION_DESC::CustomPool. +Example: + +\code +ALLOCATION_DESC allocDesc = {}; +allocDesc.CustomPool = pool; + +D3D12_RESOURCE_DESC resDesc = ... +Allocation* alloc; +hr = allocator->CreateResource(&allocDesc, &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &alloc, IID_NULL, NULL); +\endcode + +All allocations must be released before releasing the pool. +The pool must be released before relasing the allocator. + +\code +alloc->Release(); +pool->Release(); +\endcode + +\section custom_pools_features_and_benefits Features and benefits + +While it is recommended to use default pools whenever possible for simplicity and to give the allocator +more opportunities for internal optimizations, custom pools may be useful in following cases: + +- To keep some resources separate from others in memory. +- To keep track of memory usage of just a specific group of resources. %Statistics can be queried using + D3D12MA::Pool::CalculateStatistics. +- To use specific size of a memory block (`ID3D12Heap`). To set it, use member D3D12MA::POOL_DESC::BlockSize. + When set to 0, the library uses automatically determined, variable block sizes. +- To reserve some minimum amount of memory allocated. To use it, set member D3D12MA::POOL_DESC::MinBlockCount. +- To limit maximum amount of memory allocated. To use it, set member D3D12MA::POOL_DESC::MaxBlockCount. +- To use extended parameters of the D3D12 memory allocation. While resources created from default pools + can only specify `D3D12_HEAP_TYPE_DEFAULT`, `UPLOAD`, `READBACK`, a custom pool may use non-standard + `D3D12_HEAP_PROPERTIES` (member D3D12MA::POOL_DESC::HeapProperties) and `D3D12_HEAP_FLAGS` + (D3D12MA::POOL_DESC::HeapFlags), which is useful e.g. for cross-adapter sharing or UMA + (see also D3D12MA::Allocator::IsUMA). + +New versions of this library support creating **committed allocations in custom pools**. +It is supported only when D3D12MA::POOL_DESC::BlockSize = 0. +To use this feature, set D3D12MA::ALLOCATION_DESC::CustomPool to the pointer to your custom pool and +D3D12MA::ALLOCATION_DESC::Flags to D3D12MA::ALLOCATION_FLAG_COMMITTED. Example: + +\code +ALLOCATION_DESC allocDesc = {}; +allocDesc.CustomPool = pool; +allocDesc.Flags = ALLOCATION_FLAG_COMMITTED; + +D3D12_RESOURCE_DESC resDesc = ... +Allocation* alloc; +ID3D12Resource* res; +hr = allocator->CreateResource(&allocDesc, &resDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &alloc, IID_PPV_ARGS(&res)); +\endcode + +This feature may seem unnecessary, but creating committed allocations from custom pools may be useful +in some cases, e.g. to have separate memory usage statistics for some group of resources or to use +extended allocation parameters, like custom `D3D12_HEAP_PROPERTIES`, which are available only in custom pools. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because %D3D12MA is a low level library that only allocates memory. +It cannot recreate buffers and textures in a new place as it doesn't remember the contents of `D3D12_RESOURCE_DESC` structure. +It cannot copy their contents as it doesn't record any commands to a command list. + +Example: + +\code +D3D12MA::DEFRAGMENTATION_DESC defragDesc = {}; +defragDesc.Flags = D3D12MA::DEFRAGMENTATION_FLAG_ALGORITHM_FAST; + +D3D12MA::DefragmentationContext* defragCtx; +allocator->BeginDefragmentation(&defragDesc, &defragCtx); + +for(;;) +{ + D3D12MA::DEFRAGMENTATION_PASS_MOVE_INFO pass; + HRESULT hr = defragCtx->BeginPass(&pass); + if(hr == S_OK) + break; + else if(hr != S_FALSE) + // Handle error... + + for(UINT i = 0; i < pass.MoveCount; ++i) + { + // Inspect pass.pMoves[i].pSrcAllocation, identify what buffer/texture it represents. + MyEngineResourceData* resData = (MyEngineResourceData*)pMoves[i].pSrcAllocation->GetPrivateData(); + + // Recreate this buffer/texture as placed at pass.pMoves[i].pDstTmpAllocation. + D3D12_RESOURCE_DESC resDesc = ... + ID3D12Resource* newRes; + hr = device->CreatePlacedResource( + pass.pMoves[i].pDstTmpAllocation->GetHeap(), + pass.pMoves[i].pDstTmpAllocation->GetOffset(), &resDesc, + D3D12_RESOURCE_STATE_COPY_DEST, NULL, IID_PPV_ARGS(&newRes)); + // Check hr... + + // Store new resource in the pDstTmpAllocation. + pass.pMoves[i].pDstTmpAllocation->SetResource(newRes); + + // Copy its content to the new place. + cmdList->CopyResource( + pass.pMoves[i].pDstTmpAllocation->GetResource(), + pass.pMoves[i].pSrcAllocation->GetResource()); + } + + // Make sure the copy commands finished executing. + cmdQueue->ExecuteCommandLists(...); + // ... + WaitForSingleObject(fenceEvent, INFINITE); + + // Update appropriate descriptors to point to the new places... + + hr = defragCtx->EndPass(&pass); + if(hr == S_OK) + break; + else if(hr != S_FALSE) + // Handle error... +} + +defragCtx->Release(); +\endcode + +Although functions like D3D12MA::Allocator::CreateResource() +create an allocation and a buffer/texture at once, these are just a shortcut for +allocating memory and creating a placed resource. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `S_FALSE` not `S_OK`. +In each pass: + +1. D3D12MA::DefragmentationContext::BeginPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `ID3D12Heap` + offset using methods like D3D12MA::Allocation::GetHeap(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/textures as placed at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Store the pointer to the new resource in the temporary destination allocation. +3. D3D12MA::DefragmentationContext::EndPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source D3D12MA::Allocation objects that are moved to point to the destination reserved memory + and destination resource, while source resource is released. + - Frees `ID3D12Heap` blocks that became empty. + +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `CopyResource()`. + - You need to make sure these commands finished executing before the source buffers/textures are released by D3D12MA::DefragmentationContext::EndPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient render-target texture to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this texture + without copying its data. +- If the resource is in `D3D12_HEAP_TYPE_READBACK` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - D3D12MA::DefragmentationContext::EndPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].Operation` to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - D3D12MA::DefragmentationContext::EndPass() will then free both source and destination memory, and will destroy the source D3D12MA::Allocation object. + +You can defragment a specific custom pool by calling D3D12MA::Pool::BeginDefragmentation +or all the default pools by calling D3D12MA::Allocator::BeginDefragmentation (like in the example above). + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different heap types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `GetResourceAllocationInfo()` is also respected after defragmentation. +Buffers/textures should be recreated with the same `D3D12_RESOURCE_DESC` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: D3D12MA::DEFRAGMENTATION_DESC::MaxBytesPerPass, D3D12MA::DEFRAGMENTATION_DESC::MaxAllocationsPerPass. + +Thread safety: +It is safe to perform the defragmentation asynchronously to render frames and other Direct3D 12 and %D3D12MA +usage, possibly from multiple threads, with the exception that allocations +returned in D3D12MA::DEFRAGMENTATION_PASS_MOVE_INFO::pMoves shouldn't be released until the defragmentation pass is ended. +During the call to D3D12MA::DefragmentationContext::BeginPass(), any operations on the memory pool +affected by the defragmentation are blocked by a mutex. + +What it means in practice is that you shouldn't free any allocations from the defragmented pool +since the moment a call to `BeginPass` begins. Otherwise, a thread performing the `allocation->Release()` +would block for the time `BeginPass` executes and then free the allocation when it finishes, while the allocation +could have ended up on the list of allocations to move. +A solution to freeing allocations during defragmentation is to find such allocation on the list +`pass.pMoves[i]` and set its operation to D3D12MA::DEFRAGMENTATION_MOVE_OPERATION_DESTROY instead of +calling `allocation->Release()`, or simply deferring the release to the time after defragmentation finished. + +Mapping is out of scope of this library and so it is not preserved after an allocation is moved during defragmentation. +You need to map the new resource yourself if needed. + +\note Defragmentation is not supported in custom pools created with D3D12MA::POOL_FLAG_ALGORITHM_LINEAR. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from D3D12. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per memory segment group, together with current budget, +you can call function D3D12MA::Allocator::GetBudget() and inspect structure D3D12MA::Budget. +This is useful to keep track of memory usage and stay withing budget. +Example: + +\code +D3D12MA::Budget localBudget; +allocator->GetBudget(&localBudget, NULL); + +printf("My GPU memory currently has %u allocations taking %llu B,\n", + localBudget.Statistics.AllocationCount, + localBudget.Statistics.AllocationBytes); +printf("allocated out of %u D3D12 memory heaps taking %llu B,\n", + localBudget.Statistics.BlockCount, + localBudget.Statistics.BlockBytes); +printf("D3D12 reports total usage %llu B with budget %llu B.\n", + localBudget.UsageBytes, + localBudget.BudgetBytes); +\endcode + +You can query for more detailed statistics per heap type, memory segment group, and totals, +including minimum and maximum allocation size and unused range size, +by calling function D3D12MA::Allocator::CalculateStatistics() and inspecting structure D3D12MA::TotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function D3D12MA::Pool::GetStatistics() +or D3D12MA::Pool::CalculateStatistics(). + +You can query for information about a specific allocation using functions of the D3D12MA::Allocation class, +e.g. `GetSize()`, `GetOffset()`, `GetHeap()`. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function D3D12MA::Allocator::BuildStatsString(). +The result is guaranteed to be correct JSON. +It uses Windows Unicode (UTF-16) encoding. +Any strings provided by user (see D3D12MA::Allocation::SetName()) +are copied as-is and properly escaped for JSON. +It must be freed using function D3D12MA::Allocator::FreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using D3D12MA::Allocator::CalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can create these resources in +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using D3D12MA, but you need to create your resources +using special function D3D12MA::Allocator::CreateAliasingResource. +Before that, you need to allocate memory with parameters calculated using formula: + +- allocation size = max(size of each resource) +- allocation alignment = max(alignment of each resource) + +Following example shows two different textures created in the same place in memory, +allocated to fit largest of them. + +\code +D3D12_RESOURCE_DESC resDesc1 = {}; +resDesc1.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; +resDesc1.Alignment = 0; +resDesc1.Width = 1920; +resDesc1.Height = 1080; +resDesc1.DepthOrArraySize = 1; +resDesc1.MipLevels = 1; +resDesc1.Format = DXGI_FORMAT_R8G8B8A8_UNORM; +resDesc1.SampleDesc.Count = 1; +resDesc1.SampleDesc.Quality = 0; +resDesc1.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; +resDesc1.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + +D3D12_RESOURCE_DESC resDesc2 = {}; +resDesc2.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; +resDesc2.Alignment = 0; +resDesc2.Width = 1024; +resDesc2.Height = 1024; +resDesc2.DepthOrArraySize = 1; +resDesc2.MipLevels = 0; +resDesc2.Format = DXGI_FORMAT_R8G8B8A8_UNORM; +resDesc2.SampleDesc.Count = 1; +resDesc2.SampleDesc.Quality = 0; +resDesc2.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; +resDesc2.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + +const D3D12_RESOURCE_ALLOCATION_INFO allocInfo1 = + device->GetResourceAllocationInfo(0, 1, &resDesc1); +const D3D12_RESOURCE_ALLOCATION_INFO allocInfo2 = + device->GetResourceAllocationInfo(0, 1, &resDesc2); + +D3D12_RESOURCE_ALLOCATION_INFO finalAllocInfo = {}; +finalAllocInfo.Alignment = std::max(allocInfo1.Alignment, allocInfo2.Alignment); +finalAllocInfo.SizeInBytes = std::max(allocInfo1.SizeInBytes, allocInfo2.SizeInBytes); + +D3D12MA::ALLOCATION_DESC allocDesc = {}; +allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; +allocDesc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; + +D3D12MA::Allocation* alloc; +hr = allocator->AllocateMemory(&allocDesc, &finalAllocInfo, &alloc); +assert(alloc != NULL && alloc->GetHeap() != NULL); + +ID3D12Resource* res1; +hr = allocator->CreateAliasingResource( + alloc, + 0, // AllocationLocalOffset + &resDesc1, + D3D12_RESOURCE_STATE_COMMON, + NULL, // pOptimizedClearValue + IID_PPV_ARGS(&res1)); + +ID3D12Resource* res2; +hr = allocator->CreateAliasingResource( + alloc, + 0, // AllocationLocalOffset + &resDesc2, + D3D12_RESOURCE_STATE_COMMON, + NULL, // pOptimizedClearValue + IID_PPV_ARGS(&res2)); + +// You can use res1 and res2, but not at the same time! + +res2->Release(); +res1->Release(); +alloc->Release(); +\endcode + +Remember that using resouces that alias in memory requires proper synchronization. +You need to issue a special barrier of type `D3D12_RESOURCE_BARRIER_TYPE_ALIASING`. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `res1` and then want to use `res2`, you need to first initialize `res2` +using either Clear, Discard, or Copy to the entire resource. + +Additional considerations: + +- D3D12 also allows to interpret contents of memory between aliasing resources consistently in some cases, + which is called "data inheritance". For details, see + Microsoft documentation chapter "Memory Aliasing and Data Inheritance". +- You can create more complex layout where different textures and buffers are bound + at different offsets inside one large allocation. For example, one can imagine + a big texture used in some render passes, aliasing with a set of many small buffers + used in some further passes. To bind a resource at non-zero offset of an allocation, + call D3D12MA::Allocator::CreateAliasingResource with appropriate value of `AllocationLocalOffset` parameter. +- Resources of the three categories: buffers, textures with `RENDER_TARGET` or `DEPTH_STENCIL` flags, and all other textures, + can be placed in the same memory only when `allocator->GetD3D12Options().ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2`. + Otherwise they must be placed in different memory heap types, and thus aliasing them is not possible. + + +\page linear_algorithm Linear allocation algorithm + +Each D3D12 memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +D3D12MA::POOL_FLAG_ALGORITHM_LINEAR to D3D12MA::POOL_DESC::Flags while creating +D3D12MA::Pool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\section linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually by calling `allocation->Release()`. You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with D3D12MA::POOL_DESC::MaxBlockCount +value that allows multiple memory blocks. + +\section linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with D3D12MA::POOL_DESC::MaxBlockCount +value that allows multiple memory blocks. + +\section linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag D3D12MA::ALLOCATION_FLAG_UPPER_ADDRESS +to D3D12MA::ALLOCATION_DESC::Flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +D3D12MA::POOL_DESC::MaxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual `E_OUTOFMEMORY` error. + +\section linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +D3D12MA::POOL_DESC::MaxBlockCount must be 1. Otherwise behavior is undefined. + +\section linear_algorithm_additional_considerations Additional considerations + +Linear algorithm can also be used with \ref virtual_allocator. +See flag D3D12MA::VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to D3D12. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have D3D12MA::Allocator object created. +All you need to do is to create a separate D3D12MA::VirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in D3D12MA::ALLOCATOR_DESC structure. +-# Call D3D12MA::CreateVirtualBlock. Get new D3D12MA::VirtualBlock object. + +Example: + +\code +D3D12MA::VIRTUAL_BLOCK_DESC blockDesc = {}; +blockDesc.Size = 1048576; // 1 MB + +D3D12MA::VirtualBlock *block; +HRESULT hr = CreateVirtualBlock(&blockDesc, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +D3D12MA::VirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main D3D12 memory allocator. +A single allocation is identified by a lightweight structure D3D12MA::VirtualAllocation. +You will also likely want to know the offset at which the allocation was made in the block. + +In order to make an allocation: + +-# Fill in D3D12MA::VIRTUAL_ALLOCATION_DESC structure. +-# Call D3D12MA::VirtualBlock::Allocate. Get new D3D12MA::VirtualAllocation value that identifies the allocation. + +Example: + +\code +D3D12MA::VIRTUAL_ALLOCATION_DESC allocDesc = {}; +allocDesc.Size = 4096; // 4 KB + +D3D12MA::VirtualAllocation alloc; +UINT64 allocOffset; +hr = block->Allocate(&allocDesc, &alloc, &allocOffset); +if(SUCCEEDED(hr)) +{ + // Use the 4 KB of your memory starting at allocOffset. +} +else +{ + // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling D3D12MA::VirtualBlock::FreeAllocation. + +When whole block is no longer needed, the block object can be released by calling `block->Release()`. +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call `block->FreeAllocation` for each allocation, you can use D3D12MA::VirtualBlock::Clear to free them all at once - +a feature not available in normal D3D12 memory allocator. + +Example: + +\code +block->FreeAllocation(alloc); +block->Release(); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using D3D12MA::VirtualBlock::SetAllocationPrivateData. +Its default value is `NULL`. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ + std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +block->SetAllocationPrivateData(alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +D3D12MA::VirtualBlock::GetAllocationInfo and inspecting returned structure D3D12MA::VIRTUAL_ALLOCATION_INFO. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VIRTUAL_ALLOCATION_INFO allocInfo; +block->GetAllocationInfo(alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pPrivateData; + +block->FreeAllocation(alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +D3D12MA::VIRTUAL_ALLOCATION_DESC::Alignment to request it. Example: + +\code +D3D12MA::VIRTUAL_ALLOCATION_DESC allocDesc = {}; +allocDesc.Size = 4096; // 4 KB +allocDesc.Alignment = 4; // Returned offset must be a multiply of 4 B + +D3D12MA::VirtualAllocation alloc; +UINT64 allocOffset; +hr = block->Allocate(&allocDesc, &alloc, &allocOffset); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- D3D12MA::VIRTUAL_BLOCK_DESC::Size +- D3D12MA::VIRTUAL_ALLOCATION_DESC::Size and D3D12MA::VIRTUAL_ALLOCATION_DESC::Alignment +- Using offset returned by D3D12MA::VirtualBlock::Allocate and D3D12MA::VIRTUAL_ALLOCATION_INFO::Offset + +\section virtual_allocator_statistics Statistics + +You can obtain brief statistics of a virtual block using D3D12MA::VirtualBlock::GetStatistics(). +The function fills structure D3D12MA::Statistics - same as used by the normal D3D12 memory allocator. +Example: + +\code +D3D12MA::Statistics stats; +block->GetStatistics(&stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", + stats.AllocationBytes, stats.AllocationCount); +\endcode + +More detailed statistics can be obtained using function D3D12MA::VirtualBlock::CalculateStatistics(), +but they are slower to calculate. + +You can also request a full list of allocations and free regions as a string in JSON format by calling +D3D12MA::VirtualBlock::BuildStatsString. +Returned string must be later freed using D3D12MA::VirtualBlock::FreeStatsString. +The format of this string may differ from the one returned by the main D3D12 allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +Alternative, linear algorithm can be used with virtual allocator - see flag +D3D12MA::VIRTUAL_BLOCK_FLAG_ALGORITHM_LINEAR and documentation: \ref linear_algorithm. + +Note that the "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + + +\page configuration Configuration + +Please check file `D3D12MemAlloc.cpp` lines between "Configuration Begin" and +"Configuration End" to find macros that you can define to change the behavior of +the library, primarily for debugging purposes. + +\section custom_memory_allocator Custom CPU memory allocator + +If you use custom allocator for CPU memory rather than default C++ operator `new` +and `delete` or `malloc` and `free` functions, you can make this library using +your allocator as well by filling structure D3D12MA::ALLOCATION_CALLBACKS and +passing it as optional member D3D12MA::ALLOCATOR_DESC::pAllocationCallbacks. +Functions pointed there will be used by the library to make any CPU-side +allocations. Example: + +\code +#include + +void* CustomAllocate(size_t Size, size_t Alignment, void* pPrivateData) +{ + void* memory = _aligned_malloc(Size, Alignment); + // Your extra bookkeeping here... + return memory; +} + +void CustomFree(void* pMemory, void* pPrivateData) +{ + // Your extra bookkeeping here... + _aligned_free(pMemory); +} + +(...) + +D3D12MA::ALLOCATION_CALLBACKS allocationCallbacks = {}; +allocationCallbacks.pAllocate = &CustomAllocate; +allocationCallbacks.pFree = &CustomFree; + +D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; +allocatorDesc.pDevice = device; +allocatorDesc.pAdapter = adapter; +allocatorDesc.pAllocationCallbacks = &allocationCallbacks; + +D3D12MA::Allocator* allocator; +HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator); +\endcode + + +\section debug_margins Debug margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment returned by `ID3D12Device::GetResourceAllocationInfo`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `D3D12MA_DEBUG_MARGIN` to some non-zero value (e.g. 16) inside "D3D12MemAlloc.cpp" +to enforce specified number of bytes as a margin after every allocation. + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all memory heap types. + +Margin is applied only to placed allocations made out of memory heaps and not to committed +allocations, which have their own, implicit memory heap of specific size. +It is thus not applied to allocations made using D3D12MA::ALLOCATION_FLAG_COMMITTED flag +or those automatically decided to put into committed allocations, e.g. due to its large size. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate D3D12MA::Allocator objects can be used independently. + In typical applications there should be no need to create multiple such objects though - one per `ID3D12Device` is enough. +- All calls to methods of D3D12MA::Allocator class are safe to be made from multiple + threads simultaneously because they are synchronized internally when needed. +- When the allocator is created with D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED, + calls to methods of D3D12MA::Allocator class must be made from a single thread or synchronized by the user. + Using this flag may improve performance. +- D3D12MA::VirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatiblity is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +- **Descriptor allocation.** Although also called "heaps", objects that represent + descriptors are separate part of the D3D12 API from buffers and textures. + You can still use \ref virtual_allocator to manage descriptors and their ranges inside a descriptor heap. +- **Support for reserved (tiled) resources.** We don't recommend using them. +- Support for `ID3D12Device::Evict` and `MakeResident`. We don't recommend using them. + You can call them on the D3D12 objects manually. + Plese keep in mind, however, that eviction happens on the level of entire `ID3D12Heap` memory blocks + and not individual buffers or textures which may be placed inside them. +- **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not the GPU memory), allocation failures are not + handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +- **Code free of any compiler warnings.** + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +- This is a C++ library. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. +*/ diff --git a/External/memalloc/vk_mem_alloc.h b/External/memalloc/vk_mem_alloc.h new file mode 100644 index 00000000..bcb8b8a6 --- /dev/null +++ b/External/memalloc/vk_mem_alloc.h @@ -0,0 +1,18745 @@ +// +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-parameter" +# pragma clang diagnostic ignored "-Wunused-variable" +#else +# pragma warning(push) +# pragma warning(disable : 4100) // unreferenced formal parameter +# pragma warning(disable : 4189) // local variable is initialized but not referenced +# pragma warning(disable : 4127) // conditional expression is constant +#endif + +#ifndef AMD_VULKAN_MEMORY_ALLOCATOR_H +#define AMD_VULKAN_MEMORY_ALLOCATOR_H + +/** \mainpage Vulkan Memory Allocator + +Version 3.1.0 + +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) + + +API documentation divided into groups: [Topics](topics.html) + +General documentation chapters: + +- User guide + - \subpage quick_start + - [Project setup](@ref quick_start_project_setup) + - [Initialization](@ref quick_start_initialization) + - [Resource allocation](@ref quick_start_resource_allocation) + - \subpage choosing_memory_type + - [Usage](@ref choosing_memory_type_usage) + - [Required and preferred flags](@ref choosing_memory_type_required_preferred_flags) + - [Explicit memory types](@ref choosing_memory_type_explicit_memory_types) + - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) + - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) + - \subpage memory_mapping + - [Copy functions](@ref memory_mapping_copy_functions) + - [Mapping functions](@ref memory_mapping_mapping_functions) + - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) + - [Cache flush and invalidate](@ref memory_mapping_cache_control) + - \subpage staying_within_budget + - [Querying for budget](@ref staying_within_budget_querying_for_budget) + - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) + - \subpage resource_aliasing + - \subpage custom_memory_pools + - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) + - [When not to use custom pools](@ref custom_memory_pools_when_not_use) + - [Linear allocation algorithm](@ref linear_algorithm) + - [Free-at-once](@ref linear_algorithm_free_at_once) + - [Stack](@ref linear_algorithm_stack) + - [Double stack](@ref linear_algorithm_double_stack) + - [Ring buffer](@ref linear_algorithm_ring_buffer) + - \subpage defragmentation + - \subpage statistics + - [Numeric statistics](@ref statistics_numeric_statistics) + - [JSON dump](@ref statistics_json_dump) + - \subpage allocation_annotation + - [Allocation user data](@ref allocation_user_data) + - [Allocation names](@ref allocation_names) + - \subpage virtual_allocator + - \subpage debugging_memory_usage + - [Memory initialization](@ref debugging_memory_usage_initialization) + - [Margins](@ref debugging_memory_usage_margins) + - [Corruption detection](@ref debugging_memory_usage_corruption_detection) + - [Leak detection features](@ref debugging_memory_usage_leak_detection) + - \subpage other_api_interop +- \subpage usage_patterns + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) +- \subpage configuration + - [Pointers to Vulkan functions](@ref config_Vulkan_functions) + - [Custom host memory allocator](@ref custom_memory_allocator) + - [Device memory allocation callbacks](@ref allocation_callbacks) + - [Device heap memory limit](@ref heap_memory_limit) +- Extension support + - \subpage vk_khr_dedicated_allocation + - \subpage enabling_buffer_device_address + - \subpage vk_ext_memory_priority + - \subpage vk_amd_device_coherent_memory +- \subpage general_considerations + - [Thread safety](@ref general_considerations_thread_safety) + - [Versioning and compatibility](@ref general_considerations_versioning_and_compatibility) + - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) + - [Allocation algorithm](@ref general_considerations_allocation_algorithm) + - [Features not supported](@ref general_considerations_features_not_supported) + +\defgroup group_init Library initialization + +\brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. + +\defgroup group_alloc Memory allocation + +\brief API elements related to the allocation, deallocation, and management of Vulkan memory, buffers, images. +Most basic ones being: vmaCreateBuffer(), vmaCreateImage(). + +\defgroup group_virtual Virtual allocator + +\brief API elements related to the mechanism of \ref virtual_allocator - using the core allocation algorithm +for user-defined purpose without allocating any real GPU memory. + +\defgroup group_stats Statistics + +\brief API elements that query current status of the allocator, from memory usage, budget, to full dump of the internal state in JSON format. +See documentation chapter: \ref statistics. +*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(VULKAN_H_) +#include +#endif + +#if !defined(VMA_VULKAN_VERSION) + #if defined(VK_VERSION_1_3) + #define VMA_VULKAN_VERSION 1003000 + #elif defined(VK_VERSION_1_2) + #define VMA_VULKAN_VERSION 1002000 + #elif defined(VK_VERSION_1_1) + #define VMA_VULKAN_VERSION 1001000 + #else + #define VMA_VULKAN_VERSION 1000000 + #endif +#endif + +#if defined(__ANDROID__) && defined(VK_NO_PROTOTYPES) && VMA_STATIC_VULKAN_FUNCTIONS + extern PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + extern PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + extern PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + extern PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + extern PFN_vkAllocateMemory vkAllocateMemory; + extern PFN_vkFreeMemory vkFreeMemory; + extern PFN_vkMapMemory vkMapMemory; + extern PFN_vkUnmapMemory vkUnmapMemory; + extern PFN_vkFlushMappedMemoryRanges vkFlushMappedMemoryRanges; + extern PFN_vkInvalidateMappedMemoryRanges vkInvalidateMappedMemoryRanges; + extern PFN_vkBindBufferMemory vkBindBufferMemory; + extern PFN_vkBindImageMemory vkBindImageMemory; + extern PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + extern PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + extern PFN_vkCreateBuffer vkCreateBuffer; + extern PFN_vkDestroyBuffer vkDestroyBuffer; + extern PFN_vkCreateImage vkCreateImage; + extern PFN_vkDestroyImage vkDestroyImage; + extern PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + #if VMA_VULKAN_VERSION >= 1001000 + extern PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2; + extern PFN_vkGetImageMemoryRequirements2 vkGetImageMemoryRequirements2; + extern PFN_vkBindBufferMemory2 vkBindBufferMemory2; + extern PFN_vkBindImageMemory2 vkBindImageMemory2; + extern PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2; + #endif // #if VMA_VULKAN_VERSION >= 1001000 +#endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES + +#if !defined(VMA_DEDICATED_ALLOCATION) + #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation + #define VMA_DEDICATED_ALLOCATION 1 + #else + #define VMA_DEDICATED_ALLOCATION 0 + #endif +#endif + +#if !defined(VMA_BIND_MEMORY2) + #if VK_KHR_bind_memory2 + #define VMA_BIND_MEMORY2 1 + #else + #define VMA_BIND_MEMORY2 0 + #endif +#endif + +#if !defined(VMA_MEMORY_BUDGET) + #if VK_EXT_memory_budget && (VK_KHR_get_physical_device_properties2 || VMA_VULKAN_VERSION >= 1001000) + #define VMA_MEMORY_BUDGET 1 + #else + #define VMA_MEMORY_BUDGET 0 + #endif +#endif + +// Defined to 1 when VK_KHR_buffer_device_address device extension or equivalent core Vulkan 1.2 feature is defined in its headers. +#if !defined(VMA_BUFFER_DEVICE_ADDRESS) + #if VK_KHR_buffer_device_address || VMA_VULKAN_VERSION >= 1002000 + #define VMA_BUFFER_DEVICE_ADDRESS 1 + #else + #define VMA_BUFFER_DEVICE_ADDRESS 0 + #endif +#endif + +// Defined to 1 when VK_EXT_memory_priority device extension is defined in Vulkan headers. +#if !defined(VMA_MEMORY_PRIORITY) + #if VK_EXT_memory_priority + #define VMA_MEMORY_PRIORITY 1 + #else + #define VMA_MEMORY_PRIORITY 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) + #if VK_KHR_maintenance4 + #define VMA_KHR_MAINTENANCE4 1 + #else + #define VMA_KHR_MAINTENANCE4 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) + #if VK_KHR_maintenance5 + #define VMA_KHR_MAINTENANCE5 1 + #else + #define VMA_KHR_MAINTENANCE5 0 + #endif +#endif + + +// Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. +#if !defined(VMA_EXTERNAL_MEMORY) + #if VK_KHR_external_memory + #define VMA_EXTERNAL_MEMORY 1 + #else + #define VMA_EXTERNAL_MEMORY 0 + #endif +#endif + +// Define these macros to decorate all public functions with additional code, +// before and after returned type, appropriately. This may be useful for +// exporting the functions when compiling VMA as a separate library. Example: +// #define VMA_CALL_PRE __declspec(dllexport) +// #define VMA_CALL_POST __cdecl +#ifndef VMA_CALL_PRE + #define VMA_CALL_PRE +#endif +#ifndef VMA_CALL_POST + #define VMA_CALL_POST +#endif + +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT + #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + +// Define this macro to decorate pointers with an attribute specifying the +// length of the array they point to if they are not null. +// +// The length may be one of +// - The name of another parameter in the argument list where the pointer is declared +// - The name of another member in the struct where the pointer is declared +// - The name of a member of a struct type, meaning the value of that member in +// the context of the call. For example +// VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount"), +// this means the number of memory heaps available in the device associated +// with the VmaAllocator being dealt with. +#ifndef VMA_LEN_IF_NOT_NULL + #define VMA_LEN_IF_NOT_NULL(len) +#endif + +// The VMA_NULLABLE macro is defined to be _Nullable when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nullable +#ifndef VMA_NULLABLE + #ifdef __clang__ + #define VMA_NULLABLE _Nullable + #else + #define VMA_NULLABLE + #endif +#endif + +// The VMA_NOT_NULL macro is defined to be _Nonnull when compiling with Clang. +// see: https://clang.llvm.org/docs/AttributeReference.html#nonnull +#ifndef VMA_NOT_NULL + #ifdef __clang__ + #define VMA_NOT_NULL _Nonnull + #else + #define VMA_NOT_NULL + #endif +#endif + +// If non-dispatchable handles are represented as pointers then we can give +// then nullability annotations +#ifndef VMA_NOT_NULL_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NOT_NULL_NON_DISPATCHABLE VMA_NOT_NULL + #else + #define VMA_NOT_NULL_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_NULLABLE_NON_DISPATCHABLE + #if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + #define VMA_NULLABLE_NON_DISPATCHABLE VMA_NULLABLE + #else + #define VMA_NULLABLE_NON_DISPATCHABLE + #endif +#endif + +#ifndef VMA_STATS_STRING_ENABLED + #define VMA_STATS_STRING_ENABLED 1 +#endif + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// INTERFACE +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// Sections for managing code placement in file, only for development purposes e.g. for convenient folding inside an IDE. +#ifndef _VMA_ENUM_DECLARATIONS + +/** +\addtogroup group_init +@{ +*/ + +/// Flags for created #VmaAllocator. +typedef enum VmaAllocatorCreateFlagBits +{ + /** \brief Allocator and all objects created from it will not be synchronized internally, so you must guarantee they are used from only one thread at a time or synchronized externally by you. + + Using this flag may increase performance because internal mutexes are not used. + */ + VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT = 0x00000001, + /** \brief Enables usage of VK_KHR_dedicated_allocation extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + Using this extension will automatically allocate dedicated blocks of memory for + some buffers and images instead of suballocating place for them out of bigger + memory blocks (as if you explicitly used #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT + flag) when it is recommended by the driver. It may improve performance on some + GPUs. + + You may set this flag only if you found out that following device extensions are + supported, you enabled them while creating Vulkan device passed as + VmaAllocatorCreateInfo::device, and you want them to be used internally by this + library: + + - VK_KHR_get_memory_requirements2 (device extension) + - VK_KHR_dedicated_allocation (device extension) + + When this flag is set, you can experience following warnings reported by Vulkan + validation layer. You can ignore them. + + > vkBindBufferMemory(): Binding memory to buffer 0x2d but vkGetBufferMemoryRequirements() has not been called on that buffer. + */ + VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT = 0x00000002, + /** + Enables usage of VK_KHR_bind_memory2 extension. + + The flag works only if VmaAllocatorCreateInfo::vulkanApiVersion `== VK_API_VERSION_1_0`. + When it is `VK_API_VERSION_1_1`, the flag is ignored because the extension has been promoted to Vulkan 1.1. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library. + + The extension provides functions `vkBindBufferMemory2KHR` and `vkBindImageMemory2KHR`, + which allow to pass a chain of `pNext` structures while binding. + This flag is required if you use `pNext` parameter in vmaBindBufferMemory2() or vmaBindImageMemory2(). + */ + VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT = 0x00000004, + /** + Enables usage of VK_EXT_memory_budget extension. + + You may set this flag only if you found out that this device extension is supported, + you enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + and you want it to be used internally by this library, along with another instance extension + VK_KHR_get_physical_device_properties2, which is required by it (or Vulkan 1.1, where this extension is promoted). + + The extension provides query for current memory usage and budget, which will probably + be more accurate than an estimation used by the library otherwise. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT = 0x00000008, + /** + Enables usage of VK_AMD_device_coherent_memory extension. + + You may set this flag only if you: + + - found out that this device extension is supported and enabled it while creating Vulkan device passed as VmaAllocatorCreateInfo::device, + - checked that `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true and set it while creating the Vulkan device, + - want it to be used internally by this library. + + The extension and accompanying device feature provide access to memory types with + `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flags. + They are useful mostly for writing breadcrumb markers - a common method for debugging GPU crash/hang/TDR. + + When the extension is not enabled, such memory types are still enumerated, but their usage is illegal. + To protect from this error, if you don't create the allocator with this flag, it will refuse to allocate any memory or create a custom pool in such memory type, + returning `VK_ERROR_FEATURE_NOT_PRESENT`. + */ + VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT = 0x00000010, + /** + Enables usage of "buffer device address" feature, which allows you to use function + `vkGetBufferDeviceAddress*` to get raw GPU pointer to a buffer and pass it for usage inside a shader. + + You may set this flag only if you: + + 1. (For Vulkan version < 1.2) Found as available and enabled device extension + VK_KHR_buffer_device_address. + This extension is promoted to core Vulkan 1.2. + 2. Found as available and enabled device feature `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress`. + + When this flag is set, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT` using VMA. + The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` to + allocated memory blocks wherever it might be needed. + + For more information, see documentation chapter \ref enabling_buffer_device_address. + */ + VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT = 0x00000020, + /** + Enables usage of VK_EXT_memory_priority extension in the library. + + You may set this flag only if you found available and enabled this device extension, + along with `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority == VK_TRUE`, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + + When this flag is used, VmaAllocationCreateInfo::priority and VmaPoolCreateInfo::priority + are used to set priorities of allocated Vulkan memory. Without it, these variables are ignored. + + A priority must be a floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + Larger values are higher priority. The granularity of the priorities is implementation-dependent. + It is automatically passed to every call to `vkAllocateMemory` done by the library using structure `VkMemoryPriorityAllocateInfoEXT`. + The value to be used for default priority is 0.5. + For more details, see the documentation of the VK_EXT_memory_priority extension. + */ + VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, + /** + Enables usage of VK_KHR_maintenance4 extension in the library. + + You may set this flag only if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, + /** + Enables usage of VK_KHR_maintenance5 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, + + VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocatorCreateFlagBits; +/// See #VmaAllocatorCreateFlagBits. +typedef VkFlags VmaAllocatorCreateFlags; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/// \brief Intended usage of the allocated memory. +typedef enum VmaMemoryUsage +{ + /** No intended memory usage specified. + Use other members of VmaAllocationCreateInfo to specify your requirements. + */ + VMA_MEMORY_USAGE_UNKNOWN = 0, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_GPU_ONLY = 1, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. + */ + VMA_MEMORY_USAGE_CPU_ONLY = 2, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_TO_GPU = 3, + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + */ + VMA_MEMORY_USAGE_GPU_TO_CPU = 4, + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + */ + VMA_MEMORY_USAGE_CPU_COPY = 5, + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. + + Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. + + Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + */ + VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, + + VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF +} VmaMemoryUsage; + +/// Flags to be passed as VmaAllocationCreateInfo::flags. +typedef enum VmaAllocationCreateFlagBits +{ + /** \brief Set this flag if the allocation should have its own memory block. + + Use it for special, big resources, like fullscreen images used as attachments. + + If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` + structure is applied if possible. + */ + VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, + + /** \brief Set this flag to only try to allocate from existing `VkDeviceMemory` blocks and never create new such block. + + If new allocation cannot be placed in any of the existing blocks, allocation + fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + + You should not use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT and + #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT at the same time. It makes no sense. + */ + VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT = 0x00000002, + /** \brief Set this flag to use a memory that will be persistently mapped and retrieve pointer to it. + + Pointer to mapped memory will be returned through VmaAllocationInfo::pMappedData. + + It is valid to use this flag for allocation made from memory type that is not + `HOST_VISIBLE`. This flag is then ignored and memory is not mapped. This is + useful if you need an allocation that is efficient to use on GPU + (`DEVICE_LOCAL`) and still want to map it directly if possible on platforms that + support it (e.g. Intel GPU). + */ + VMA_ALLOCATION_CREATE_MAPPED_BIT = 0x00000004, + /** \deprecated Preserved for backward compatibility. Consider using vmaSetAllocationName() instead. + + Set this flag to treat VmaAllocationCreateInfo::pUserData as pointer to a + null-terminated string. Instead of copying pointer value, a local copy of the + string is made and stored in allocation's `pName`. The string is automatically + freed together with the allocation. It is also used in vmaBuildStatsString(). + */ + VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT = 0x00000020, + /** Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = 0x00000040, + /** Create both buffer/image and allocation, but don't bind them together. + It is useful when you want to bind yourself to do some more advanced binding, e.g. using some extensions. + The flag is meaningful only with functions that bind by default: vmaCreateBuffer(), vmaCreateImage(). + Otherwise it is ignored. + + If you want to make sure the new buffer/image is not tied to the new memory allocation + through `VkMemoryDedicatedAllocateInfoKHR` structure in case the allocation ends up in its own memory block, + use also flag #VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT. + */ + VMA_ALLOCATION_CREATE_DONT_BIND_BIT = 0x00000080, + /** Create allocation only if additional device memory required for it, if any, won't exceed + memory budget. Otherwise return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + */ + VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, + /** \brief Set this flag if the allocated memory will have aliasing resources. + + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. + Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. + */ + VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduced by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, + /** Allocation strategy that chooses smallest possible free range for the allocation + to minimize memory usage and fragmentation, possibly at the expense of allocation time. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = 0x00010000, + /** Allocation strategy that chooses first suitable free range for the allocation - + not necessarily in terms of the smallest offset but the one that is easiest and fastest to find + to minimize allocation time, possibly at the expense of allocation quality. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = 0x00020000, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + Used internally by defragmentation, not recommended in typical usage. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = 0x00040000, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** Alias to #VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT. + */ + VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** A bit mask to extract only `STRATEGY` bits from entire set of flags. + */ + VMA_ALLOCATION_CREATE_STRATEGY_MASK = + VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + + VMA_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaAllocationCreateFlagBits; +/// See #VmaAllocationCreateFlagBits. +typedef VkFlags VmaAllocationCreateFlags; + +/// Flags to be passed as VmaPoolCreateInfo::flags. +typedef enum VmaPoolCreateFlagBits +{ + /** \brief Use this flag if you always allocate only buffers and linear images or only optimal images out of this pool and so Buffer-Image Granularity can be ignored. + + This is an optional optimization flag. + + If you always allocate using vmaCreateBuffer(), vmaCreateImage(), + vmaAllocateMemoryForBuffer(), then you don't need to use it because allocator + knows exact type of your allocations so it can handle Buffer-Image Granularity + in the optimal way. + + If you also allocate using vmaAllocateMemoryForImage() or vmaAllocateMemory(), + exact type of such allocations is not known, so allocator must be conservative + in handling Buffer-Image Granularity, which can lead to suboptimal allocation + (wasted memory). In that case, if you can make sure you always allocate only + buffers and linear images or only optimal images out of this pool, use this flag + to make allocator disregard Buffer-Image Granularity and so make allocations + faster and more optimal. + */ + VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT = 0x00000002, + + /** \brief Enables alternative, linear allocation algorithm in this pool. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT = 0x00000004, + + /** Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_POOL_CREATE_ALGORITHM_MASK = + VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaPoolCreateFlagBits; +/// Flags to be passed as VmaPoolCreateInfo::flags. See #VmaPoolCreateFlagBits. +typedef VkFlags VmaPoolCreateFlags; + +/// Flags to be passed as VmaDefragmentationInfo::flags. +typedef enum VmaDefragmentationFlagBits +{ + /* \brief Use simple but fast algorithm for defragmentation. + May not achieve best results but will require least time to compute and least allocations to copy. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT = 0x1, + /* \brief Default defragmentation algorithm, applied also when no `ALGORITHM` flag is specified. + Offers a balance between defragmentation quality and the amount of allocations and bytes that need to be moved. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT = 0x2, + /* \brief Perform full defragmentation of memory. + Can result in notably more time to compute and allocations to copy, but will achieve best memory packing. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT = 0x4, + /** \brief Use the most roboust algorithm at the cost of time to compute and number of copies to make. + Only available when bufferImageGranularity is greater than 1, since it aims to reduce + alignment issues between different types of resources. + Otherwise falls back to same behavior as #VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT. + */ + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT = 0x8, + + /// A bit mask to extract only `ALGORITHM` bits from entire set of flags. + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK = + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT | + VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT, + + VMA_DEFRAGMENTATION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaDefragmentationFlagBits; +/// See #VmaDefragmentationFlagBits. +typedef VkFlags VmaDefragmentationFlags; + +/// Operation performed on single defragmentation move. See structure #VmaDefragmentationMove. +typedef enum VmaDefragmentationMoveOperation +{ + /// Buffer/image has been recreated at `dstTmpAllocation`, data has been copied, old buffer/image has been destroyed. `srcAllocation` should be changed to point to the new place. This is the default value set by vmaBeginDefragmentationPass(). + VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY = 0, + /// Set this value if you cannot move the allocation. New place reserved at `dstTmpAllocation` will be freed. `srcAllocation` will remain unchanged. + VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE = 1, + /// Set this value if you decide to abandon the allocation and you destroyed the buffer/image. New place reserved at `dstTmpAllocation` will be freed, along with `srcAllocation`, which will be destroyed. + VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY = 2, +} VmaDefragmentationMoveOperation; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. +typedef enum VmaVirtualBlockCreateFlagBits +{ + /** \brief Enables alternative, linear allocation algorithm in this virtual block. + + Specify this flag to enable linear allocation algorithm, which always creates + new allocations after last one and doesn't reuse space from allocations freed in + between. It trades memory consumption for simplified algorithm and data + structure, which has better performance and uses less memory for metadata. + + By using this flag, you can achieve behavior of free-at-once, stack, + ring buffer, and double stack. + For details, see documentation chapter \ref linear_algorithm. + */ + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT = 0x00000001, + + /** \brief Bit mask to extract only `ALGORITHM` bits from entire set of flags. + */ + VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK = + VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT, + + VMA_VIRTUAL_BLOCK_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualBlockCreateFlagBits; +/// Flags to be passed as VmaVirtualBlockCreateInfo::flags. See #VmaVirtualBlockCreateFlagBits. +typedef VkFlags VmaVirtualBlockCreateFlags; + +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. +typedef enum VmaVirtualAllocationCreateFlagBits +{ + /** \brief Allocation will be created from upper stack in a double stack pool. + + This flag is only allowed for virtual blocks created with #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT flag. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT = VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT, + /** \brief Allocation strategy that tries to minimize memory usage. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT, + /** \brief Allocation strategy that tries to minimize allocation time. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT, + /** Allocation strategy that chooses always the lowest offset in available space. + This is not the most efficient strategy but achieves highly packed data. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT = VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + /** \brief A bit mask to extract only `STRATEGY` bits from entire set of flags. + + These strategy flags are binary compatible with equivalent flags in #VmaAllocationCreateFlagBits. + */ + VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK = VMA_ALLOCATION_CREATE_STRATEGY_MASK, + + VMA_VIRTUAL_ALLOCATION_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VmaVirtualAllocationCreateFlagBits; +/// Flags to be passed as VmaVirtualAllocationCreateInfo::flags. See #VmaVirtualAllocationCreateFlagBits. +typedef VkFlags VmaVirtualAllocationCreateFlags; + +/** @} */ + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_DATA_TYPES_DECLARATIONS + +/** +\addtogroup group_init +@{ */ + +/** \struct VmaAllocator +\brief Represents main object of this library initialized. + +Fill structure #VmaAllocatorCreateInfo and call function vmaCreateAllocator() to create it. +Call function vmaDestroyAllocator() to destroy it. + +It is recommended to create just one object of this type per `VkDevice` object, +right after Vulkan is initialized and keep it alive until before Vulkan device is destroyed. +*/ +VK_DEFINE_HANDLE(VmaAllocator) + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \struct VmaPool +\brief Represents custom memory pool + +Fill structure VmaPoolCreateInfo and call function vmaCreatePool() to create it. +Call function vmaDestroyPool() to destroy it. + +For more information see [Custom memory pools](@ref choosing_memory_type_custom_memory_pools). +*/ +VK_DEFINE_HANDLE(VmaPool) + +/** \struct VmaAllocation +\brief Represents single memory allocation. + +It may be either dedicated block of `VkDeviceMemory` or a specific region of a bigger block of this type +plus unique offset. + +There are multiple ways to create such object. +You need to fill structure VmaAllocationCreateInfo. +For more information see [Choosing memory type](@ref choosing_memory_type). + +Although the library provides convenience functions that create Vulkan buffer or image, +allocate memory for it and bind them together, +binding of the allocation to a buffer or an image is out of scope of the allocation itself. +Allocation object can exist without buffer/image bound, +binding can be done manually by the user, and destruction of it can be done +independently of destruction of the allocation. + +The object also remembers its size and some other information. +To retrieve this information, use function vmaGetAllocationInfo() and inspect +returned structure VmaAllocationInfo. +*/ +VK_DEFINE_HANDLE(VmaAllocation) + +/** \struct VmaDefragmentationContext +\brief An opaque object that represents started defragmentation process. + +Fill structure #VmaDefragmentationInfo and call function vmaBeginDefragmentation() to create it. +Call function vmaEndDefragmentation() to destroy it. +*/ +VK_DEFINE_HANDLE(VmaDefragmentationContext) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualAllocation +\brief Represents single memory allocation done inside VmaVirtualBlock. + +Use it as a unique identifier to virtual allocation within the single block. + +Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. +*/ +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \struct VmaVirtualBlock +\brief Handle to a virtual block object that allows to use core allocation algorithm without allocating any real GPU memory. + +Fill in #VmaVirtualBlockCreateInfo structure and use vmaCreateVirtualBlock() to create it. Use vmaDestroyVirtualBlock() to destroy it. +For more information, see documentation chapter \ref virtual_allocator. + +This object is not thread-safe - should not be used from multiple threads simultaneously, must be synchronized externally. +*/ +VK_DEFINE_HANDLE(VmaVirtualBlock) + +/** @} */ + +/** +\addtogroup group_init +@{ +*/ + +/// Callback function called after successful vkAllocateMemory. +typedef void (VKAPI_PTR* PFN_vmaAllocateDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/// Callback function called before vkFreeMemory. +typedef void (VKAPI_PTR* PFN_vmaFreeDeviceMemoryFunction)( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryType, + VkDeviceMemory VMA_NOT_NULL_NON_DISPATCHABLE memory, + VkDeviceSize size, + void* VMA_NULLABLE pUserData); + +/** \brief Set of callbacks that the library will call for `vkAllocateMemory` and `vkFreeMemory`. + +Provided for informative purpose, e.g. to gather statistics about number of +allocations or total amount of memory allocated in Vulkan. + +Used in VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. +*/ +typedef struct VmaDeviceMemoryCallbacks +{ + /// Optional, can be null. + PFN_vmaAllocateDeviceMemoryFunction VMA_NULLABLE pfnAllocate; + /// Optional, can be null. + PFN_vmaFreeDeviceMemoryFunction VMA_NULLABLE pfnFree; + /// Optional, can be null. + void* VMA_NULLABLE pUserData; +} VmaDeviceMemoryCallbacks; + +/** \brief Pointers to some Vulkan functions - a subset used by the library. + +Used in VmaAllocatorCreateInfo::pVulkanFunctions. +*/ +typedef struct VmaVulkanFunctions +{ + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetInstanceProcAddr VMA_NULLABLE vkGetInstanceProcAddr; + /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS. + PFN_vkGetDeviceProcAddr VMA_NULLABLE vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceProperties VMA_NULLABLE vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceMemoryProperties VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties; + PFN_vkAllocateMemory VMA_NULLABLE vkAllocateMemory; + PFN_vkFreeMemory VMA_NULLABLE vkFreeMemory; + PFN_vkMapMemory VMA_NULLABLE vkMapMemory; + PFN_vkUnmapMemory VMA_NULLABLE vkUnmapMemory; + PFN_vkFlushMappedMemoryRanges VMA_NULLABLE vkFlushMappedMemoryRanges; + PFN_vkInvalidateMappedMemoryRanges VMA_NULLABLE vkInvalidateMappedMemoryRanges; + PFN_vkBindBufferMemory VMA_NULLABLE vkBindBufferMemory; + PFN_vkBindImageMemory VMA_NULLABLE vkBindImageMemory; + PFN_vkGetBufferMemoryRequirements VMA_NULLABLE vkGetBufferMemoryRequirements; + PFN_vkGetImageMemoryRequirements VMA_NULLABLE vkGetImageMemoryRequirements; + PFN_vkCreateBuffer VMA_NULLABLE vkCreateBuffer; + PFN_vkDestroyBuffer VMA_NULLABLE vkDestroyBuffer; + PFN_vkCreateImage VMA_NULLABLE vkCreateImage; + PFN_vkDestroyImage VMA_NULLABLE vkDestroyImage; + PFN_vkCmdCopyBuffer VMA_NULLABLE vkCmdCopyBuffer; +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetBufferMemoryRequirements2KHR VMA_NULLABLE vkGetBufferMemoryRequirements2KHR; + /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension. + PFN_vkGetImageMemoryRequirements2KHR VMA_NULLABLE vkGetImageMemoryRequirements2KHR; +#endif +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindBufferMemory2KHR VMA_NULLABLE vkBindBufferMemory2KHR; + /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension. + PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; +#endif +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. + PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; +#endif +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. + PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; +#endif +} VmaVulkanFunctions; + +/// Description of a Allocator to be created. +typedef struct VmaAllocatorCreateInfo +{ + /// Flags for created allocator. Use #VmaAllocatorCreateFlagBits enum. + VmaAllocatorCreateFlags flags; + /// Vulkan physical device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /// Vulkan device. + /** It must be valid throughout whole lifetime of created allocator. */ + VkDevice VMA_NOT_NULL device; + /// Preferred size of a single `VkDeviceMemory` block to be allocated from large heaps > 1 GiB. Optional. + /** Set to 0 to use default, which is currently 256 MiB. */ + VkDeviceSize preferredLargeHeapBlockSize; + /// Custom CPU memory allocation callbacks. Optional. + /** Optional, can be null. When specified, will also be used for all CPU-side memory allocations. */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; + /// Informative callbacks for `vkAllocateMemory`, `vkFreeMemory`. Optional. + /** Optional, can be null. */ + const VmaDeviceMemoryCallbacks* VMA_NULLABLE pDeviceMemoryCallbacks; + /** \brief Either null or a pointer to an array of limits on maximum number of bytes that can be allocated out of particular Vulkan memory heap. + + If not NULL, it must be a pointer to an array of + `VkPhysicalDeviceMemoryProperties::memoryHeapCount` elements, defining limit on + maximum number of bytes that can be allocated out of particular Vulkan memory + heap. + + Any of the elements may be equal to `VK_WHOLE_SIZE`, which means no limit on that + heap. This is also the default in case of `pHeapSizeLimit` = NULL. + + If there is a limit defined for a heap: + + - If user tries to allocate more memory from that heap using this allocator, + the allocation fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + - If the limit is smaller than heap size reported in `VkMemoryHeap::size`, the + value of this limit will be reported instead when using vmaGetMemoryProperties(). + + Warning! Using this feature may not be equivalent to installing a GPU with + smaller amount of memory, because graphics driver doesn't necessary fail new + allocations with `VK_ERROR_OUT_OF_DEVICE_MEMORY` result when memory capacity is + exceeded. It may return success and just silently migrate some device memory + blocks to system RAM. This driver behavior can also be controlled using + VK_AMD_memory_overallocation_behavior extension. + */ + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pHeapSizeLimit; + + /** \brief Pointers to Vulkan functions. Can be null. + + For details see [Pointers to Vulkan functions](@ref config_Vulkan_functions). + */ + const VmaVulkanFunctions* VMA_NULLABLE pVulkanFunctions; + /** \brief Handle to Vulkan instance object. + + Starting from version 3.0.0 this member is no longer optional, it must be set! + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Optional. Vulkan version that the application uses. + + It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. + The patch version number specified is ignored. Only the major and minor versions are considered. + Only versions 1.0, 1.1, 1.2, 1.3 are supported by the current implementation. + Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + It must match the Vulkan version used by the application and supported on the selected physical device, + so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` + and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. + */ + uint32_t vulkanApiVersion; +#if VMA_EXTERNAL_MEMORY + /** \brief Either null or a pointer to an array of external memory handle types for each Vulkan memory type. + + If not NULL, it must be a pointer to an array of `VkPhysicalDeviceMemoryProperties::memoryTypeCount` + elements, defining external memory handle types of particular Vulkan memory type, + to be passed using `VkExportMemoryAllocateInfoKHR`. + + Any of the elements may be equal to 0, which means not to use `VkExportMemoryAllocateInfoKHR` on this memory type. + This is also the default in case of `pTypeExternalMemoryHandleTypes` = NULL. + */ + const VkExternalMemoryHandleTypeFlagsKHR* VMA_NULLABLE VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryTypeCount") pTypeExternalMemoryHandleTypes; +#endif // #if VMA_EXTERNAL_MEMORY +} VmaAllocatorCreateInfo; + +/// Information about existing #VmaAllocator object. +typedef struct VmaAllocatorInfo +{ + /** \brief Handle to Vulkan instance object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::instance. + */ + VkInstance VMA_NOT_NULL instance; + /** \brief Handle to Vulkan physical device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::physicalDevice. + */ + VkPhysicalDevice VMA_NOT_NULL physicalDevice; + /** \brief Handle to Vulkan device object. + + This is the same value as has been passed through VmaAllocatorCreateInfo::device. + */ + VkDevice VMA_NOT_NULL device; +} VmaAllocatorInfo; + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Calculated statistics of memory usage e.g. in a specific memory type, heap, custom pool, or total. + +These are fast to calculate. +See functions: vmaGetHeapBudgets(), vmaGetPoolStatistics(). +*/ +typedef struct VmaStatistics +{ + /** \brief Number of `VkDeviceMemory` objects - Vulkan memory blocks allocated. + */ + uint32_t blockCount; + /** \brief Number of #VmaAllocation objects allocated. + + Dedicated allocations have their own blocks, so each one adds 1 to `allocationCount` as well as `blockCount`. + */ + uint32_t allocationCount; + /** \brief Number of bytes allocated in `VkDeviceMemory` blocks. + + \note To avoid confusion, please be aware that what Vulkan calls an "allocation" - a whole `VkDeviceMemory` object + (e.g. as in `VkPhysicalDeviceLimits::maxMemoryAllocationCount`) is called a "block" in VMA, while VMA calls + "allocation" a #VmaAllocation object that represents a memory region sub-allocated from such block, usually for a single buffer or image. + */ + VkDeviceSize blockBytes; + /** \brief Total number of bytes occupied by all #VmaAllocation objects. + + Always less or equal than `blockBytes`. + Difference `(blockBytes - allocationBytes)` is the amount of memory allocated from Vulkan + but unused by any #VmaAllocation. + */ + VkDeviceSize allocationBytes; +} VmaStatistics; + +/** \brief More detailed statistics than #VmaStatistics. + +These are slower to calculate. Use for debugging purposes. +See functions: vmaCalculateStatistics(), vmaCalculatePoolStatistics(). + +Previous version of the statistics API provided averages, but they have been removed +because they can be easily calculated as: + +\code +VkDeviceSize allocationSizeAvg = detailedStats.statistics.allocationBytes / detailedStats.statistics.allocationCount; +VkDeviceSize unusedBytes = detailedStats.statistics.blockBytes - detailedStats.statistics.allocationBytes; +VkDeviceSize unusedRangeSizeAvg = unusedBytes / detailedStats.unusedRangeCount; +\endcode +*/ +typedef struct VmaDetailedStatistics +{ + /// Basic statistics. + VmaStatistics statistics; + /// Number of free ranges of memory between allocations. + uint32_t unusedRangeCount; + /// Smallest allocation size. `VK_WHOLE_SIZE` if there are 0 allocations. + VkDeviceSize allocationSizeMin; + /// Largest allocation size. 0 if there are 0 allocations. + VkDeviceSize allocationSizeMax; + /// Smallest empty range size. `VK_WHOLE_SIZE` if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMin; + /// Largest empty range size. 0 if there are 0 empty ranges. + VkDeviceSize unusedRangeSizeMax; +} VmaDetailedStatistics; + +/** \brief General statistics from current state of the Allocator - +total memory usage across all memory heaps and types. + +These are slower to calculate. Use for debugging purposes. +See function vmaCalculateStatistics(). +*/ +typedef struct VmaTotalStatistics +{ + VmaDetailedStatistics memoryType[VK_MAX_MEMORY_TYPES]; + VmaDetailedStatistics memoryHeap[VK_MAX_MEMORY_HEAPS]; + VmaDetailedStatistics total; +} VmaTotalStatistics; + +/** \brief Statistics of current memory usage and available budget for a specific memory heap. + +These are fast to calculate. +See function vmaGetHeapBudgets(). +*/ +typedef struct VmaBudget +{ + /** \brief Statistics fetched from the library. + */ + VmaStatistics statistics; + /** \brief Estimated current memory usage of the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different than `statistics.blockBytes` (usually higher) due to additional implicit objects + also occupying the memory, like swapchain, pipelines, descriptor heaps, command buffers, or + `VkDeviceMemory` blocks allocated outside of this library, if any. + */ + VkDeviceSize usage; + /** \brief Estimated amount of memory available to the program, in bytes. + + Fetched from system using VK_EXT_memory_budget extension if enabled. + + It might be different (most probably smaller) than `VkMemoryHeap::size[heapIndex]` due to factors + external to the program, decided by the operating system. + Difference `budget - usage` is the amount of additional memory that can probably + be allocated without problems. Exceeding the budget may result in various problems. + */ + VkDeviceSize budget; +} VmaBudget; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Parameters of new #VmaAllocation. + +To be used with functions like vmaCreateBuffer(), vmaCreateImage(), and many others. +*/ +typedef struct VmaAllocationCreateInfo +{ + /// Use #VmaAllocationCreateFlagBits enum. + VmaAllocationCreateFlags flags; + /** \brief Intended usage of memory. + + You can leave #VMA_MEMORY_USAGE_UNKNOWN if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored. + */ + VmaMemoryUsage usage; + /** \brief Flags that must be set in a Memory Type chosen for an allocation. + + Leave 0 if you specify memory requirements in other way. \n + If `pool` is not null, this member is ignored.*/ + VkMemoryPropertyFlags requiredFlags; + /** \brief Flags that preferably should be set in a memory type chosen for an allocation. + + Set to 0 if no additional flags are preferred. \n + If `pool` is not null, this member is ignored. */ + VkMemoryPropertyFlags preferredFlags; + /** \brief Bitmask containing one bit set for every memory type acceptable for this allocation. + + Value 0 is equivalent to `UINT32_MAX` - it means any memory type is accepted if + it meets other requirements specified by this structure, with no further + restrictions on memory type index. \n + If `pool` is not null, this member is ignored. + */ + uint32_t memoryTypeBits; + /** \brief Pool that this allocation should be created in. + + Leave `VK_NULL_HANDLE` to allocate from default pool. If not null, members: + `usage`, `requiredFlags`, `preferredFlags`, `memoryTypeBits` are ignored. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Custom general-purpose pointer that will be stored in #VmaAllocation, can be read as VmaAllocationInfo::pUserData and changed using vmaSetAllocationUserData(). + + If #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT is used, it must be either + null or pointer to a null-terminated string. The string will be then copied to + internal buffer, so it doesn't need to be valid after allocation call. + */ + void* VMA_NULLABLE pUserData; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocation relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object + and this allocation ends up as dedicated or is explicitly forced as dedicated using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + Otherwise, it has the priority of a memory block where it is placed and this variable is ignored. + */ + float priority; +} VmaAllocationCreateInfo; + +/// Describes parameter of created #VmaPool. +typedef struct VmaPoolCreateInfo +{ + /** \brief Vulkan memory type index to allocate this pool from. + */ + uint32_t memoryTypeIndex; + /** \brief Use combination of #VmaPoolCreateFlagBits. + */ + VmaPoolCreateFlags flags; + /** \brief Size of a single `VkDeviceMemory` block to be allocated as part of this pool, in bytes. Optional. + + Specify nonzero to set explicit, constant size of memory blocks used by this + pool. + + Leave 0 to use default and let the library manage block sizes automatically. + Sizes of particular blocks may vary. + In this case, the pool will also support dedicated allocations. + */ + VkDeviceSize blockSize; + /** \brief Minimum number of blocks to be always allocated in this pool, even if they stay empty. + + Set to 0 to have no preallocated blocks and allow the pool be completely empty. + */ + size_t minBlockCount; + /** \brief Maximum number of blocks that can be allocated in this pool. Optional. + + Set to 0 to use default, which is `SIZE_MAX`, which means no limit. + + Set to same value as VmaPoolCreateInfo::minBlockCount to have fixed amount of memory allocated + throughout whole lifetime of this pool. + */ + size_t maxBlockCount; + /** \brief A floating-point value between 0 and 1, indicating the priority of the allocations in this pool relative to other memory allocations. + + It is used only when #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT flag was used during creation of the #VmaAllocator object. + Otherwise, this variable is ignored. + */ + float priority; + /** \brief Additional minimum alignment to be used for all allocations created from this pool. Can be 0. + + Leave 0 (default) not to impose any additional alignment. If not 0, it must be a power of two. + It can be useful in cases where alignment returned by Vulkan by functions like `vkGetBufferMemoryRequirements` is not enough, + e.g. when doing interop with OpenGL. + */ + VkDeviceSize minAllocationAlignment; + /** \brief Additional `pNext` chain to be attached to `VkMemoryAllocateInfo` used for every allocation made by this pool. Optional. + + Optional, can be null. If not null, it must point to a `pNext` chain of structures that can be attached to `VkMemoryAllocateInfo`. + It can be useful for special needs such as adding `VkExportMemoryAllocateInfoKHR`. + Structures pointed by this member must remain alive and unchanged for the whole lifetime of the custom pool. + + Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, + can be attached automatically by this library when using other, more convenient of its features. + */ + void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; +} VmaPoolCreateInfo; + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ +typedef struct VmaAllocationInfo +{ + /** \brief Memory type index that this allocation was allocated from. + + It never changes. + */ + uint32_t memoryType; + /** \brief Handle to Vulkan memory object. + + Same memory object can be shared by multiple allocations. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceMemory VMA_NULLABLE_NON_DISPATCHABLE deviceMemory; + /** \brief Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes. `(deviceMemory, offset)` pair is unique to this allocation. + + You usually don't need to use this offset. If you create a buffer or an image together with the allocation using e.g. function + vmaCreateBuffer(), vmaCreateImage(), functions that operate on these resources refer to the beginning of the buffer or image, + not entire device memory block. Functions like vmaMapMemory(), vmaBindBufferMemory() also refer to the beginning of the allocation + and apply this offset automatically. + + It can change after the allocation is moved during \ref defragmentation. + */ + VkDeviceSize offset; + /** \brief Size of this allocation, in bytes. + + It never changes. + + \note Allocation size returned in this variable may be greater than the size + requested for the resource e.g. as `VkBufferCreateInfo::size`. Whole size of the + allocation is accessible for operations on memory e.g. using a pointer after + mapping with vmaMapMemory(), but operations on the resource e.g. using + `vkCmdCopyBuffer` must be limited to the size of the resource. + */ + VkDeviceSize size; + /** \brief Pointer to the beginning of this allocation as mapped data. + + If the allocation hasn't been mapped using vmaMapMemory() and hasn't been + created with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag, this value is null. + + It can change after call to vmaMapMemory(), vmaUnmapMemory(). + It can also change after the allocation is moved during \ref defragmentation. + */ + void* VMA_NULLABLE pMappedData; + /** \brief Custom general-purpose pointer that was passed as VmaAllocationCreateInfo::pUserData or set using vmaSetAllocationUserData(). + + It can change after call to vmaSetAllocationUserData() for this allocation. + */ + void* VMA_NULLABLE pUserData; + /** \brief Custom allocation name that was set with vmaSetAllocationName(). + + It can change after call to vmaSetAllocationName() for this allocation. + + Another way to set custom name is to pass it in VmaAllocationCreateInfo::pUserData with + additional flag #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT set [DEPRECATED]. + */ + const char* VMA_NULLABLE pName; +} VmaAllocationInfo; + +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ + /** \brief Basic parameters of the allocation. + + If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. + */ + VmaAllocationInfo allocationInfo; + /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. + + In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. + */ + VkDeviceSize blockSize; + /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. + + When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation + (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). + */ + VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + +/** \brief Parameters for defragmentation. + +To be used with function vmaBeginDefragmentation(). +*/ +typedef struct VmaDefragmentationInfo +{ + /// \brief Use combination of #VmaDefragmentationFlagBits. + VmaDefragmentationFlags flags; + /** \brief Custom pool to be defragmented. + + If null then default pools will undergo defragmentation process. + */ + VmaPool VMA_NULLABLE pool; + /** \brief Maximum numbers of bytes that can be copied during single pass, while moving allocations to different places. + + `0` means no limit. + */ + VkDeviceSize maxBytesPerPass; + /** \brief Maximum number of allocations that can be moved during single pass to a different place. + + `0` means no limit. + */ + uint32_t maxAllocationsPerPass; + /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + + Have to return true for breaking current defragmentation pass. + */ + PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; + /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. + void* VMA_NULLABLE pBreakCallbackUserData; +} VmaDefragmentationInfo; + +/// Single move of an allocation to be done for defragmentation. +typedef struct VmaDefragmentationMove +{ + /// Operation to be performed on the allocation by vmaEndDefragmentationPass(). Default value is #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY. You can modify it. + VmaDefragmentationMoveOperation operation; + /// Allocation that should be moved. + VmaAllocation VMA_NOT_NULL srcAllocation; + /** \brief Temporary allocation pointing to destination memory that will replace `srcAllocation`. + + \warning Do not store this allocation in your data structures! It exists only temporarily, for the duration of the defragmentation pass, + to be used for binding new buffer/image to the destination memory using e.g. vmaBindBufferMemory(). + vmaEndDefragmentationPass() will destroy it and make `srcAllocation` point to this memory. + */ + VmaAllocation VMA_NOT_NULL dstTmpAllocation; +} VmaDefragmentationMove; + +/** \brief Parameters for incremental defragmentation steps. + +To be used with function vmaBeginDefragmentationPass(). +*/ +typedef struct VmaDefragmentationPassMoveInfo +{ + /// Number of elements in the `pMoves` array. + uint32_t moveCount; + /** \brief Array of moves to be performed by the user in the current defragmentation pass. + + Pointer to an array of `moveCount` elements, owned by VMA, created in vmaBeginDefragmentationPass(), destroyed in vmaEndDefragmentationPass(). + + For each element, you should: + + 1. Create a new buffer/image in the place pointed by VmaDefragmentationMove::dstMemory + VmaDefragmentationMove::dstOffset. + 2. Copy data from the VmaDefragmentationMove::srcAllocation e.g. using `vkCmdCopyBuffer`, `vkCmdCopyImage`. + 3. Make sure these commands finished executing on the GPU. + 4. Destroy the old buffer/image. + + Only then you can finish defragmentation pass by calling vmaEndDefragmentationPass(). + After this call, the allocation will point to the new place in memory. + + Alternatively, if you cannot move specific allocation, you can set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + + Alternatively, if you decide you want to completely remove the allocation: + + 1. Destroy its buffer/image. + 2. Set VmaDefragmentationMove::operation to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + + Then, after vmaEndDefragmentationPass() the allocation will be freed. + */ + VmaDefragmentationMove* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(moveCount) pMoves; +} VmaDefragmentationPassMoveInfo; + +/// Statistics returned for defragmentation process in function vmaEndDefragmentation(). +typedef struct VmaDefragmentationStats +{ + /// Total number of bytes that have been copied while moving allocations to different places. + VkDeviceSize bytesMoved; + /// Total number of bytes that have been released to the system by freeing empty `VkDeviceMemory` objects. + VkDeviceSize bytesFreed; + /// Number of allocations that have been moved to different places. + uint32_t allocationsMoved; + /// Number of empty `VkDeviceMemory` objects that have been released to the system. + uint32_t deviceMemoryBlocksFreed; +} VmaDefragmentationStats; + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/// Parameters of created #VmaVirtualBlock object to be passed to vmaCreateVirtualBlock(). +typedef struct VmaVirtualBlockCreateInfo +{ + /** \brief Total size of the virtual block. + + Sizes can be expressed in bytes or any units you want as long as you are consistent in using them. + For example, if you allocate from some array of structures, 1 can mean single instance of entire structure. + */ + VkDeviceSize size; + + /** \brief Use combination of #VmaVirtualBlockCreateFlagBits. + */ + VmaVirtualBlockCreateFlags flags; + + /** \brief Custom CPU memory allocation callbacks. Optional. + + Optional, can be null. When specified, they will be used for all CPU-side memory allocations. + */ + const VkAllocationCallbacks* VMA_NULLABLE pAllocationCallbacks; +} VmaVirtualBlockCreateInfo; + +/// Parameters of created virtual allocation to be passed to vmaVirtualAllocate(). +typedef struct VmaVirtualAllocationCreateInfo +{ + /** \brief Size of the allocation. + + Cannot be zero. + */ + VkDeviceSize size; + /** \brief Required alignment of the allocation. Optional. + + Must be power of two. Special value 0 has the same meaning as 1 - means no special alignment is required, so allocation can start at any offset. + */ + VkDeviceSize alignment; + /** \brief Use combination of #VmaVirtualAllocationCreateFlagBits. + */ + VmaVirtualAllocationCreateFlags flags; + /** \brief Custom pointer to be associated with the allocation. Optional. + + It can be any value and can be used for user-defined purposes. It can be fetched or changed later. + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationCreateInfo; + +/// Parameters of an existing virtual allocation, returned by vmaGetVirtualAllocationInfo(). +typedef struct VmaVirtualAllocationInfo +{ + /** \brief Offset of the allocation. + + Offset at which the allocation was made. + */ + VkDeviceSize offset; + /** \brief Size of the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::size. + */ + VkDeviceSize size; + /** \brief Custom pointer associated with the allocation. + + Same value as passed in VmaVirtualAllocationCreateInfo::pUserData or to vmaSetVirtualAllocationUserData(). + */ + void* VMA_NULLABLE pUserData; +} VmaVirtualAllocationInfo; + +/** @} */ + +#endif // _VMA_DATA_TYPES_DECLARATIONS + +#ifndef _VMA_FUNCTION_HEADERS + +/** +\addtogroup group_init +@{ +*/ + +/// Creates #VmaAllocator object. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocator VMA_NULLABLE* VMA_NOT_NULL pAllocator); + +/// Destroys allocator object. +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator VMA_NULLABLE allocator); + +/** \brief Returns information about existing #VmaAllocator object - handle to Vulkan device etc. + +It might be useful if you want to keep just the #VmaAllocator handle and fetch other required handles to +`VkPhysicalDevice`, `VkDevice` etc. every time using this function. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocatorInfo* VMA_NOT_NULL pAllocatorInfo); + +/** +PhysicalDeviceProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceProperties); + +/** +PhysicalDeviceMemoryProperties are fetched from physicalDevice by the allocator. +You can access it here, without fetching it again on your own. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + const VkPhysicalDeviceMemoryProperties* VMA_NULLABLE* VMA_NOT_NULL ppPhysicalDeviceMemoryProperties); + +/** +\brief Given Memory Type Index, returns Property Flags of this memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Sets index of the current frame. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t frameIndex); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics from current state of the Allocator. + +This function is called "calculate" not "get" because it has to traverse all +internal data structures, so it may be quite slow. Use it for debugging purposes. +For faster but more brief statistics suitable to be called every frame or every allocation, +use vmaGetHeapBudgets(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaTotalStatistics* VMA_NOT_NULL pStats); + +/** \brief Retrieves information about current memory usage and budget for all memory heaps. + +\param allocator +\param[out] pBudgets Must point to array with number of elements at least equal to number of memory heaps in physical device used. + +This function is called "get" not "calculate" because it is very fast, suitable to be called +every frame or every allocation. For more detailed statistics use vmaCalculateStatistics(). + +Note that when using allocator from multiple threads, returned information may immediately +become outdated. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator VMA_NOT_NULL allocator, + VmaBudget* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL("VkPhysicalDeviceMemoryProperties::memoryHeapCount") pBudgets); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** +\brief Helps to find memoryTypeIndex, given memoryTypeBits and VmaAllocationCreateInfo. + +This algorithm tries to find a memory type that: + +- Is allowed by memoryTypeBits. +- Contains all the flags from pAllocationCreateInfo->requiredFlags. +- Matches intended usage. +- Has as many flags from pAllocationCreateInfo->preferredFlags as possible. + +\return Returns VK_ERROR_FEATURE_NOT_PRESENT if not found. Receiving such result +from this function or any other allocating function probably means that your +device doesn't support any memory type with requested features for the specific +type of resource you want to use it for. Please check parameters of your +resource, like image layout (OPTIMAL versus LINEAR) or mip level count. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkBufferCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy buffer that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** +\brief Helps to find memoryTypeIndex, given VkImageCreateInfo and VmaAllocationCreateInfo. + +It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. +It internally creates a temporary, dummy image that never has memory bound. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + uint32_t* VMA_NOT_NULL pMemoryTypeIndex); + +/** \brief Allocates Vulkan device memory and creates #VmaPool object. + +\param allocator Allocator object. +\param pCreateInfo Parameters of pool to create. +\param[out] pPool Handle to created pool. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator VMA_NOT_NULL allocator, + const VmaPoolCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaPool VMA_NULLABLE* VMA_NOT_NULL pPool); + +/** \brief Destroys #VmaPool object and frees Vulkan device memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NULLABLE pool); + +/** @} */ + +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Retrieves statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaStatistics* VMA_NOT_NULL pPoolStats); + +/** \brief Retrieves detailed statistics of existing #VmaPool object. + +\param allocator Allocator object. +\param pool Pool object. +\param[out] pPoolStats Statistics of specified pool. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + VmaDetailedStatistics* VMA_NOT_NULL pPoolStats); + +/** @} */ + +/** +\addtogroup group_alloc +@{ +*/ + +/** \brief Checks magic number in margins around all allocations in given memory pool in search for corruptions. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and the pool is created in memory type that is +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for specified pool. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool); + +/** \brief Retrieves name of a custom pool. + +After the call `ppName` is either null or points to an internally-owned null-terminated string +containing name of the pool that was previously set. The pointer becomes invalid when the pool is +destroyed or its name is changed using vmaSetPoolName(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE* VMA_NOT_NULL ppName); + +/** \brief Sets name of a custom pool. + +`pName` can be either null or pointer to a null-terminated string with new name for the pool. +Function makes internal copy of the string, so it can be changed or freed immediately after this call. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator VMA_NOT_NULL allocator, + VmaPool VMA_NOT_NULL pool, + const char* VMA_NULLABLE pName); + +/** \brief General purpose memory allocation. + +\param allocator +\param pVkMemoryRequirements +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +It is recommended to use vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(), +vmaCreateBuffer(), vmaCreateImage() instead whenever possible. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief General purpose memory allocation for multiple allocation objects at once. + +\param allocator Allocator object. +\param pVkMemoryRequirements Memory requirements for each allocation. +\param pCreateInfo Creation parameters for each allocation. +\param allocationCount Number of allocations to make. +\param[out] pAllocations Pointer to array that will be filled with handles to created allocations. +\param[out] pAllocationInfo Optional. Pointer to array that will be filled with parameters of created allocations. + +You should free the memory using vmaFreeMemory() or vmaFreeMemoryPages(). + +Word "pages" is just a suggestion to use this function to allocate pieces of memory needed for sparse binding. +It is just a general purpose allocation function able to make multiple allocations at once. +It may be internally optimized to be more efficient than calling vmaAllocateMemory() `allocationCount` times. + +All allocations are made using same parameters. All of them are created out of the same memory pool and type. +If any allocation fails, all allocations already made within this function call are also freed, so that when +returned result is not `VK_SUCCESS`, `pAllocation` array is always entirely filled with `VK_NULL_HANDLE`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + const VkMemoryRequirements* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pVkMemoryRequirements, + const VmaAllocationCreateInfo* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pCreateInfo, + size_t allocationCount, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations, + VmaAllocationInfo* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkBuffer`. + +\param allocator +\param buffer +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindBufferMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateBuffer(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Allocates memory suitable for given `VkImage`. + +\param allocator +\param image +\param pCreateInfo +\param[out] pAllocation Handle to allocated memory. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +It only creates #VmaAllocation. To bind the memory to the buffer, use vmaBindImageMemory(). + +This is a special-purpose function. In most cases you should use vmaCreateImage(). + +You must free the allocation using vmaFreeMemory() when no longer needed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const VmaAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Frees memory previously allocated using vmaAllocateMemory(), vmaAllocateMemoryForBuffer(), or vmaAllocateMemoryForImage(). + +Passing `VK_NULL_HANDLE` as `allocation` is valid. Such function call is just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator VMA_NOT_NULL allocator, + const VmaAllocation VMA_NULLABLE allocation); + +/** \brief Frees memory and destroys multiple allocations. + +Word "pages" is just a suggestion to use this function to free pieces of memory used for sparse binding. +It is just a general purpose function to free memory and destroy allocations made using e.g. vmaAllocateMemory(), +vmaAllocateMemoryPages() and other functions. +It may be internally optimized to be more efficient than calling vmaFreeMemory() `allocationCount` times. + +Allocations in `pAllocations` array can come from any memory pools and types. +Passing `VK_NULL_HANDLE` as elements of `pAllocations` array is valid. Such entries are just skipped. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator VMA_NOT_NULL allocator, + size_t allocationCount, + const VmaAllocation VMA_NULLABLE* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(allocationCount) pAllocations); + +/** \brief Returns current information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. + +Although this function doesn't lock any mutex, so it should be quite efficient, +you should avoid calling it too often. +You can retrieve same VmaAllocationInfo structure while creating your resource, from function +vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change +(e.g. due to defragmentation). + +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); + +/** \brief Returns extended information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); + +/** \brief Sets pUserData in given allocation to new value. + +The value of pointer `pUserData` is copied to allocation's `pUserData`. +It is opaque, so you can use it however you want - e.g. +as a pointer, ordinal number or some handle to you own data. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Sets pName in given allocation to new value. + +`pName` must be either null, or pointer to a null-terminated string. The function +makes local copy of the string and sets it as allocation's `pName`. String +passed as pName doesn't need to be valid for whole lifetime of the allocation - +you can free it after this call. String previously pointed by allocation's +`pName` is freed from memory. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName); + +/** +\brief Given an allocation, returns Property Flags of its memory type. + +This is just a convenience function. Same information can be obtained using +vmaGetAllocationInfo() + vmaGetMemoryProperties(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags); + +/** \brief Maps memory represented by given allocation and returns pointer to it. + +Maps memory represented by given allocation to make it accessible to CPU code. +When succeeded, `*ppData` contains pointer to first byte of this memory. + +\warning +If the allocation is part of a bigger `VkDeviceMemory` block, returned pointer is +correctly offsetted to the beginning of region assigned to this particular allocation. +Unlike the result of `vkMapMemory`, it points to the allocation, not to the beginning of the whole block. +You should not add VmaAllocationInfo::offset to it! + +Mapping is internally reference-counted and synchronized, so despite raw Vulkan +function `vkMapMemory()` cannot be used to map same block of `VkDeviceMemory` +multiple times simultaneously, it is safe to call this function on allocations +assigned to the same memory block. Actual Vulkan memory will be mapped on first +mapping and unmapped on last unmapping. + +If the function succeeded, you must call vmaUnmapMemory() to unmap the +allocation when mapping is no longer needed or before freeing the allocation, at +the latest. + +It also safe to call this function multiple times on the same allocation. You +must call vmaUnmapMemory() same number of times as you called vmaMapMemory(). + +It is also safe to call this function on allocation created with +#VMA_ALLOCATION_CREATE_MAPPED_BIT flag. Its memory stays mapped all the time. +You must still call vmaUnmapMemory() same number of times as you called +vmaMapMemory(). You must not call vmaUnmapMemory() additional time to free the +"0-th" mapping made automatically due to #VMA_ALLOCATION_CREATE_MAPPED_BIT flag. + +This function fails when used on allocation made in memory type that is not +`HOST_VISIBLE`. + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + void* VMA_NULLABLE* VMA_NOT_NULL ppData); + +/** \brief Unmaps memory represented by given allocation, mapped previously using vmaMapMemory(). + +For details, see description of vmaMapMemory(). + +This function doesn't automatically flush or invalidate caches. +If the allocation is made from a memory types that is not `HOST_COHERENT`, +you also need to use vmaInvalidateAllocation() / vmaFlushAllocation(), as required by Vulkan specification. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation); + +/** \brief Flushes memory of given allocation. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called after writing to a mapped memory for memory types that are not `HOST_COHERENT`. +Unmap operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Invalidates memory of given allocation. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given range of given allocation. +It needs to be called before reading from a mapped memory for memory types that are not `HOST_COHERENT`. +Map operation doesn't do that automatically. + +- `offset` must be relative to the beginning of allocation. +- `size` can be `VK_WHOLE_SIZE`. It means all memory from `offset` the the end of given allocation. +- `offset` and `size` don't have to be aligned. + They are internally rounded down/up to multiply of `nonCoherentAtomSize`. +- If `size` is 0, this call is ignored. +- If memory type that the `allocation` belongs to is not `HOST_VISIBLE` or it is `HOST_COHERENT`, + this call is ignored. + +Warning! `offset` and `size` are relative to the contents of given `allocation`. +If you mean whole allocation, you can pass 0 and `VK_WHOLE_SIZE`, respectively. +Do not pass allocation's offset as `offset`!!! + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if +it is called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize offset, + VkDeviceSize size); + +/** \brief Flushes memory of given set of allocations. + +Calls `vkFlushMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaFlushAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Invalidates memory of given set of allocations. + +Calls `vkInvalidateMappedMemoryRanges()` for memory associated with given ranges of given allocations. +For more information, see documentation of vmaInvalidateAllocation(). + +\param allocator +\param allocationCount +\param allocations +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. +\param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. + +This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is +called, otherwise `VK_SUCCESS`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t allocationCount, + const VmaAllocation VMA_NOT_NULL* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) allocations, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, + const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); + +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator VMA_NOT_NULL allocator, + const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, + VmaAllocation VMA_NOT_NULL dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, + VkDeviceSize size); + +/** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. + +\param allocator +\param memoryTypeBits Bit mask, where each bit set means that a memory type with that index should be checked. + +Corruption detection is enabled only when `VMA_DEBUG_DETECT_CORRUPTION` macro is defined to nonzero, +`VMA_DEBUG_MARGIN` is defined to nonzero and only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. For more information, see [Corruption detection](@ref debugging_memory_usage_corruption_detection). + +Possible return values: + +- `VK_ERROR_FEATURE_NOT_PRESENT` - corruption detection is not enabled for any of specified memory types. +- `VK_SUCCESS` - corruption detection has been performed and succeeded. +- `VK_ERROR_UNKNOWN` - corruption detection has been performed and found memory corruptions around one of the allocations. + `VMA_ASSERT` is also fired in that case. +- Other value: Error returned by Vulkan, e.g. memory mapping failure. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator VMA_NOT_NULL allocator, + uint32_t memoryTypeBits); + +/** \brief Begins defragmentation process. + +\param allocator Allocator object. +\param pInfo Structure filled with parameters of defragmentation. +\param[out] pContext Context object that must be passed to vmaEndDefragmentation() to finish defragmentation. +\returns +- `VK_SUCCESS` if defragmentation can begin. +- `VK_ERROR_FEATURE_NOT_PRESENT` if defragmentation is not supported. + +For more information about defragmentation, see documentation chapter: +[Defragmentation](@ref defragmentation). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + const VmaDefragmentationInfo* VMA_NOT_NULL pInfo, + VmaDefragmentationContext VMA_NULLABLE* VMA_NOT_NULL pContext); + +/** \brief Ends defragmentation process. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pStats Optional stats for the defragmentation. Can be null. + +Use this function to finish defragmentation started by vmaBeginDefragmentation(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationStats* VMA_NULLABLE pStats); + +/** \brief Starts single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param[out] pPassInfo Computed information for current pass. +\returns +- `VK_SUCCESS` if no more moves are possible. Then you can omit call to vmaEndDefragmentationPass() and simply end whole defragmentation. +- `VK_INCOMPLETE` if there are pending moves returned in `pPassInfo`. You need to perform them, call vmaEndDefragmentationPass(), + and then preferably try another pass with vmaBeginDefragmentationPass(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Ends single defragmentation pass. + +\param allocator Allocator object. +\param context Context object that has been created by vmaBeginDefragmentation(). +\param pPassInfo Computed information for current pass filled by vmaBeginDefragmentationPass() and possibly modified by you. + +Returns `VK_SUCCESS` if no more moves are possible or `VK_INCOMPLETE` if more defragmentations are possible. + +Ends incremental defragmentation pass and commits all defragmentation moves from `pPassInfo`. +After this call: + +- Allocations at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY + (which is the default) will be pointing to the new destination place. +- Allocation at `pPassInfo[i].srcAllocation` that had `pPassInfo[i].operation ==` #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY + will be freed. + +If no more moves are possible you can end whole defragmentation. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo); + +/** \brief Binds buffer to allocation. + +Binds specified buffer to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create a buffer, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindBufferMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateBuffer() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer); + +/** \brief Binds buffer to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param buffer +\param pNext A chain of structures to be attached to `VkBindBufferMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindBufferMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); + +/** \brief Binds image to allocation. + +Binds specified image to region of memory represented by specified allocation. +Gets `VkDeviceMemory` handle and offset from the allocation. +If you want to create an image, allocate memory for it and bind them together separately, +you should use this function for binding instead of standard `vkBindImageMemory()`, +because it ensures proper synchronization so that when a `VkDeviceMemory` object is used by multiple +allocations, calls to `vkBind*Memory()` or `vkMapMemory()` won't happen from multiple threads simultaneously +(which is illegal in Vulkan). + +It is recommended to use function vmaCreateImage() instead of this one. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image); + +/** \brief Binds image to allocation with additional parameters. + +\param allocator +\param allocation +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the `allocation`. Normally it should be 0. +\param image +\param pNext A chain of structures to be attached to `VkBindImageMemoryInfoKHR` structure used internally. Normally it should be null. + +This function is similar to vmaBindImageMemory(), but it provides additional parameters. + +If `pNext` is not null, #VmaAllocator object must have been created with #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT flag +or with VmaAllocatorCreateInfo::vulkanApiVersion `>= VK_API_VERSION_1_1`. Otherwise the call fails. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); + +/** \brief Creates a new `VkBuffer`, allocates and binds memory for it. + +\param allocator +\param pBufferCreateInfo +\param pAllocationCreateInfo +\param[out] pBuffer Buffer that was created. +\param[out] pAllocation Allocation that was created. +\param[out] pAllocationInfo Optional. Information about allocated memory. It can be later fetched using function vmaGetAllocationInfo(). + +This function automatically: + +-# Creates buffer. +-# Allocates appropriate memory for it. +-# Binds the buffer with the memory. + +If any of these operations fail, buffer and allocation are not created, +returned value is negative error code, `*pBuffer` and `*pAllocation` are null. + +If the function succeeded, you must destroy both buffer and allocation when you +no longer need them using either convenience function vmaDestroyBuffer() or +separately, using `vkDestroyBuffer()` and vmaFreeMemory(). + +If #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag was used, +VK_KHR_dedicated_allocation extension is used internally to query driver whether +it requires or prefers the new buffer to have dedicated allocation. If yes, +and if dedicated allocation is possible +(#VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT is not used), it creates dedicated +allocation for this buffer, just like when using +#VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + +\note This function creates a new `VkBuffer`. Sub-allocation of parts of one large buffer, +although recommended as a good practice, is out of scope of this library and could be implemented +by the user as a higher-level logic on top of VMA. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a buffer with additional minimum alignment. + +Similar to vmaCreateBuffer() but provides additional parameter `minAlignment` which allows to specify custom, +minimum alignment to be used when placing the buffer inside a larger memory block, which may be needed e.g. +for interop with OpenGL. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator VMA_NOT_NULL allocator, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note There is a new version of this function augmented with parameter `allocationLocalOffset` - see vmaCreateAliasingBuffer2(). +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Creates a new `VkBuffer`, binds already created memory for it. + +\param allocator +\param allocation Allocation that provides memory to be used for binding new buffer to it. +\param allocationLocalOffset Additional offset to be added while binding, relative to the beginning of the allocation. Normally it should be 0. +\param pBufferCreateInfo +\param[out] pBuffer Buffer that was created. + +This function automatically: + +-# Creates buffer. +-# Binds the buffer with the supplied memory. + +If any of these operations fail, buffer is not created, +returned value is negative error code and `*pBuffer` is null. + +If the function succeeded, you must destroy the buffer when you +no longer need it using `vkDestroyBuffer()`. If you want to also destroy the corresponding +allocation you can use convenience function vmaDestroyBuffer(). + +\note This is a new version of the function augmented with parameter `allocationLocalOffset`. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer); + +/** \brief Destroys Vulkan buffer and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyBuffer(device, buffer, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as buffer and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE buffer, + VmaAllocation VMA_NULLABLE allocation); + +/// Function similar to vmaCreateBuffer(). +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator VMA_NOT_NULL allocator, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + const VmaAllocationCreateInfo* VMA_NOT_NULL pAllocationCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage, + VmaAllocation VMA_NULLABLE* VMA_NOT_NULL pAllocation, + VmaAllocationInfo* VMA_NULLABLE pAllocationInfo); + +/// Function similar to vmaCreateAliasingBuffer() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/// Function similar to vmaCreateAliasingBuffer2() but for images. +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage); + +/** \brief Destroys Vulkan image and frees allocated memory. + +This is just a convenience function equivalent to: + +\code +vkDestroyImage(device, image, allocationCallbacks); +vmaFreeMemory(allocator, allocation); +\endcode + +It is safe to pass null as image and/or allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation); + +/** @} */ + +/** +\addtogroup group_virtual +@{ +*/ + +/** \brief Creates new #VmaVirtualBlock object. + +\param pCreateInfo Parameters for creation. +\param[out] pVirtualBlock Returned virtual block object or `VMA_NULL` if creation failed. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE* VMA_NOT_NULL pVirtualBlock); + +/** \brief Destroys #VmaVirtualBlock object. + +Please note that you should consciously handle virtual allocations that could remain unfreed in the block. +You should either free them individually using vmaVirtualFree() or call vmaClearVirtualBlock() +if you are sure this is what you want. If you do neither, an assert is called. + +If you keep pointers to some additional metadata associated with your virtual allocations in their `pUserData`, +don't forget to free them. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock( + VmaVirtualBlock VMA_NULLABLE virtualBlock); + +/** \brief Returns true of the #VmaVirtualBlock is empty - contains 0 virtual allocations and has all its space available for new allocations. +*/ +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Returns information about a specific virtual allocation within a virtual block, like its size and `pUserData` pointer. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo); + +/** \brief Allocates new virtual allocation inside given #VmaVirtualBlock. + +If the allocation fails due to not enough free space available, `VK_ERROR_OUT_OF_DEVICE_MEMORY` is returned +(despite the function doesn't ever allocate actual GPU memory). +`pAllocation` is then set to `VK_NULL_HANDLE` and `pOffset`, if not null, it set to `UINT64_MAX`. + +\param virtualBlock Virtual block +\param pCreateInfo Parameters for the allocation +\param[out] pAllocation Returned handle of the new allocation +\param[out] pOffset Returned offset of the new allocation. Optional, can be null. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset); + +/** \brief Frees virtual allocation inside given #VmaVirtualBlock. + +It is correct to call this function with `allocation == VK_NULL_HANDLE` - it does nothing. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation); + +/** \brief Frees all virtual allocations inside given #VmaVirtualBlock. + +You must either call this function or free each virtual allocation individually with vmaVirtualFree() +before destroying a virtual block. Otherwise, an assert is called. + +If you keep pointer to some additional metadata associated with your virtual allocation in its `pUserData`, +don't forget to free it as well. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock( + VmaVirtualBlock VMA_NOT_NULL virtualBlock); + +/** \brief Changes custom pointer associated with given virtual allocation. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, + void* VMA_NULLABLE pUserData); + +/** \brief Calculates and returns statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is fast to call. For more detailed statistics, see vmaCalculateVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats); + +/** \brief Calculates and returns detailed statistics about virtual allocations and memory usage in given #VmaVirtualBlock. + +This function is slow to call. Use for debugging purposes. +For less detailed statistics, see vmaGetVirtualBlockStatistics(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats); + +/** @} */ + +#if VMA_STATS_STRING_ENABLED +/** +\addtogroup group_stats +@{ +*/ + +/** \brief Builds and returns a null-terminated string in JSON format with information about given #VmaVirtualBlock. +\param virtualBlock Virtual block. +\param[out] ppStatsString Returned string. +\param detailedMap Pass `VK_FALSE` to only obtain statistics as returned by vmaCalculateVirtualBlockStatistics(). Pass `VK_TRUE` to also obtain full list of allocations and free spaces. + +Returned string must be freed using vmaFreeVirtualBlockStatsString(). +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +/// Frees a string returned by vmaBuildVirtualBlockStatsString(). +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString( + VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString); + +/** \brief Builds and returns statistics as a null-terminated string in JSON format. +\param allocator +\param[out] ppStatsString Must be freed using vmaFreeStatsString() function. +\param detailedMap +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE* VMA_NOT_NULL ppStatsString, + VkBool32 detailedMap); + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator VMA_NOT_NULL allocator, + char* VMA_NULLABLE pStatsString); + +/** @} */ + +#endif // VMA_STATS_STRING_ENABLED + +#endif // _VMA_FUNCTION_HEADERS + +#ifdef __cplusplus +} +#endif + +#endif // AMD_VULKAN_MEMORY_ALLOCATOR_H + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// For Visual Studio IntelliSense. +#if defined(__cplusplus) && defined(__INTELLISENSE__) +#define VMA_IMPLEMENTATION +#endif + +#ifdef VMA_IMPLEMENTATION +#undef VMA_IMPLEMENTATION + +#include +#include +#include +#include +#include +#include + +#if !defined(VMA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define VMA_CPP20 1 + #else + #define VMA_CPP20 0 + #endif +#endif + +#ifdef _MSC_VER + #include // For functions like __popcnt, _BitScanForward etc. +#endif +#if VMA_CPP20 + #include +#endif + +#if VMA_STATS_STRING_ENABLED + #include // For snprintf +#endif + +/******************************************************************************* +CONFIGURATION SECTION + +Define some of these macros before each #include of this header or change them +here if you need other then default behavior depending on your environment. +*/ +#ifndef _VMA_CONFIGURATION + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = &vkAllocateMemory; +*/ +#if !defined(VMA_STATIC_VULKAN_FUNCTIONS) && !defined(VK_NO_PROTOTYPES) + #define VMA_STATIC_VULKAN_FUNCTIONS 1 +#endif + +/* +Define this macro to 1 to make the library fetch pointers to Vulkan functions +internally, like: + + vulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkGetDeviceProcAddr(device, "vkAllocateMemory"); + +To use this feature in new versions of VMA you now have to pass +VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as +VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. +*/ +#if !defined(VMA_DYNAMIC_VULKAN_FUNCTIONS) + #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#endif + +#ifndef VMA_USE_STL_SHARED_MUTEX + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_USE_STL_SHARED_MUTEX 1 + // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus + // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. + #elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918 && __cplusplus == 199711L && _MSVC_LANG >= 201703L + #define VMA_USE_STL_SHARED_MUTEX 1 + #else + #define VMA_USE_STL_SHARED_MUTEX 0 + #endif +#endif + +/* +Define this macro to include custom header files without having to edit this file directly, e.g.: + + // Inside of "my_vma_configuration_user_includes.h": + + #include "my_custom_assert.h" // for MY_CUSTOM_ASSERT + #include "my_custom_min.h" // for my_custom_min + #include + #include + + // Inside a different file, which includes "vk_mem_alloc.h": + + #define VMA_CONFIGURATION_USER_INCLUDES_H "my_vma_configuration_user_includes.h" + #define VMA_ASSERT(expr) MY_CUSTOM_ASSERT(expr) + #define VMA_MIN(v1, v2) (my_custom_min(v1, v2)) + #include "vk_mem_alloc.h" + ... + +The following headers are used in this CONFIGURATION section only, so feel free to +remove them if not needed. +*/ +#if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) + #include // for assert + #include // for min, max, swap + #include +#else + #include VMA_CONFIGURATION_USER_INCLUDES_H +#endif + +#ifndef VMA_NULL + // Value used as null pointer. Define it to e.g.: nullptr, NULL, 0, (void*)0. + #define VMA_NULL nullptr +#endif + +#ifndef VMA_FALLTHROUGH + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_FALLTHROUGH [[fallthrough]] + #else + #define VMA_FALLTHROUGH + #endif +#endif + +// Normal assert to check for programmer's errors, especially in Debug configuration. +#ifndef VMA_ASSERT + #ifdef NDEBUG + #define VMA_ASSERT(expr) + #else + #define VMA_ASSERT(expr) assert(expr) + #endif +#endif + +// Assert that will be called very often, like inside data structures e.g. operator[]. +// Making it non-empty can make program slow. +#ifndef VMA_HEAVY_ASSERT + #ifdef NDEBUG + #define VMA_HEAVY_ASSERT(expr) + #else + #define VMA_HEAVY_ASSERT(expr) //VMA_ASSERT(expr) + #endif +#endif + +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK + #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr) +#endif + +// If your compiler is not compatible with C++17 and definition of +// aligned_alloc() function is missing, uncommenting following line may help: + +//#include + +#if defined(__ANDROID_API__) && (__ANDROID_API__ < 16) +#include +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + return memalign(alignment, size); +} +#elif defined(__APPLE__) || defined(__ANDROID__) || (defined(__linux__) && defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)) +#include + +#if defined(__APPLE__) +#include +#endif + +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + // Unfortunately, aligned_alloc causes VMA to crash due to it returning null pointers. (At least under 11.4) + // Therefore, for now disable this specific exception until a proper solution is found. + //#if defined(__APPLE__) && (defined(MAC_OS_X_VERSION_10_16) || defined(__IPHONE_14_0)) + //#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_16 || __IPHONE_OS_VERSION_MAX_ALLOWED >= __IPHONE_14_0 + // // For C++14, usr/include/malloc/_malloc.h declares aligned_alloc()) only + // // with the MacOSX11.0 SDK in Xcode 12 (which is what adds + // // MAC_OS_X_VERSION_10_16), even though the function is marked + // // available for 10.15. That is why the preprocessor checks for 10.16 but + // // the __builtin_available checks for 10.15. + // // People who use C++17 could call aligned_alloc with the 10.15 SDK already. + // if (__builtin_available(macOS 10.15, iOS 13, *)) + // return aligned_alloc(alignment, size); + //#endif + //#endif + + // alignment must be >= sizeof(void*) + if(alignment < sizeof(void*)) + { + alignment = sizeof(void*); + } + + void *pointer; + if(posix_memalign(&pointer, alignment, size) == 0) + return pointer; + return VMA_NULL; +} +#elif defined(_WIN32) +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return _aligned_malloc(size, alignment); +} +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + return aligned_alloc(alignment, size); +} +#else +static void* vma_aligned_alloc(size_t alignment, size_t size) +{ + VMA_ASSERT(0 && "Could not implement aligned_alloc automatically. Please enable C++17 or later in your compiler or provide custom implementation of macro VMA_SYSTEM_ALIGNED_MALLOC (and VMA_SYSTEM_ALIGNED_FREE if needed) using the API of your system."); + return VMA_NULL; +} +#endif + +#if defined(_WIN32) +static void vma_aligned_free(void* ptr) +{ + _aligned_free(ptr); +} +#else +static void vma_aligned_free(void* VMA_NULLABLE ptr) +{ + free(ptr); +} +#endif + +#ifndef VMA_ALIGN_OF + #define VMA_ALIGN_OF(type) (alignof(type)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_MALLOC + #define VMA_SYSTEM_ALIGNED_MALLOC(size, alignment) vma_aligned_alloc((alignment), (size)) +#endif + +#ifndef VMA_SYSTEM_ALIGNED_FREE + // VMA_SYSTEM_FREE is the old name, but might have been defined by the user + #if defined(VMA_SYSTEM_FREE) + #define VMA_SYSTEM_ALIGNED_FREE(ptr) VMA_SYSTEM_FREE(ptr) + #else + #define VMA_SYSTEM_ALIGNED_FREE(ptr) vma_aligned_free(ptr) + #endif +#endif + +#ifndef VMA_COUNT_BITS_SET + // Returns number of bits set to 1 in (v) + #define VMA_COUNT_BITS_SET(v) VmaCountBitsSet(v) +#endif + +#ifndef VMA_BITSCAN_LSB + // Scans integer for index of first nonzero value from the Least Significant Bit (LSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_LSB(mask) VmaBitScanLSB(mask) +#endif + +#ifndef VMA_BITSCAN_MSB + // Scans integer for index of first nonzero value from the Most Significant Bit (MSB). If mask is 0 then returns UINT8_MAX + #define VMA_BITSCAN_MSB(mask) VmaBitScanMSB(mask) +#endif + +#ifndef VMA_MIN + #define VMA_MIN(v1, v2) ((std::min)((v1), (v2))) +#endif + +#ifndef VMA_MAX + #define VMA_MAX(v1, v2) ((std::max)((v1), (v2))) +#endif + +#ifndef VMA_SORT + #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) +#endif + +#ifndef VMA_DEBUG_LOG_FORMAT + #define VMA_DEBUG_LOG_FORMAT(format, ...) + /* + #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) + */ +#endif + +#ifndef VMA_DEBUG_LOG + #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT + #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY + #define VMA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE + #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; +#endif + +// Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. +#if VMA_STATS_STRING_ENABLED + static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) + { + snprintf(outStr, strLen, "%" PRIu32, num); + } + static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) + { + snprintf(outStr, strLen, "%" PRIu64, num); + } + static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) + { + snprintf(outStr, strLen, "%p", ptr); + } +#endif + +#ifndef VMA_MUTEX + class VmaMutex + { + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) + public: + VmaMutex() { } + void Lock() { m_Mutex.lock(); } + void Unlock() { m_Mutex.unlock(); } + bool TryLock() { return m_Mutex.try_lock(); } + private: + std::mutex m_Mutex; + }; + #define VMA_MUTEX VmaMutex +#endif + +// Read-write mutex, where "read" is shared access, "write" is exclusive access. +#ifndef VMA_RW_MUTEX + #if VMA_USE_STL_SHARED_MUTEX + // Use std::shared_mutex from C++17. + #include + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.lock_shared(); } + void UnlockRead() { m_Mutex.unlock_shared(); } + bool TryLockRead() { return m_Mutex.try_lock_shared(); } + void LockWrite() { m_Mutex.lock(); } + void UnlockWrite() { m_Mutex.unlock(); } + bool TryLockWrite() { return m_Mutex.try_lock(); } + private: + std::shared_mutex m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #elif defined(_WIN32) && defined(WINVER) && WINVER >= 0x0600 + // Use SRWLOCK from WinAPI. + // Minimum supported client = Windows Vista, server = Windows Server 2008. + class VmaRWMutex + { + public: + VmaRWMutex() { InitializeSRWLock(&m_Lock); } + void LockRead() { AcquireSRWLockShared(&m_Lock); } + void UnlockRead() { ReleaseSRWLockShared(&m_Lock); } + bool TryLockRead() { return TryAcquireSRWLockShared(&m_Lock) != FALSE; } + void LockWrite() { AcquireSRWLockExclusive(&m_Lock); } + void UnlockWrite() { ReleaseSRWLockExclusive(&m_Lock); } + bool TryLockWrite() { return TryAcquireSRWLockExclusive(&m_Lock) != FALSE; } + private: + SRWLOCK m_Lock; + }; + #define VMA_RW_MUTEX VmaRWMutex + #else + // Less efficient fallback: Use normal mutex. + class VmaRWMutex + { + public: + void LockRead() { m_Mutex.Lock(); } + void UnlockRead() { m_Mutex.Unlock(); } + bool TryLockRead() { return m_Mutex.TryLock(); } + void LockWrite() { m_Mutex.Lock(); } + void UnlockWrite() { m_Mutex.Unlock(); } + bool TryLockWrite() { return m_Mutex.TryLock(); } + private: + VMA_MUTEX m_Mutex; + }; + #define VMA_RW_MUTEX VmaRWMutex + #endif // #if VMA_USE_STL_SHARED_MUTEX +#endif // #ifndef VMA_RW_MUTEX + +/* +If providing your own implementation, you need to implement a subset of std::atomic. +*/ +#ifndef VMA_ATOMIC_UINT32 + #include + #define VMA_ATOMIC_UINT32 std::atomic +#endif + +#ifndef VMA_ATOMIC_UINT64 + #include + #define VMA_ATOMIC_UINT64 std::atomic +#endif + +#ifndef VMA_DEBUG_ALWAYS_DEDICATED_MEMORY + /** + Every allocation will have its own memory block. + Define to 1 for debugging purposes only. + */ + #define VMA_DEBUG_ALWAYS_DEDICATED_MEMORY (0) +#endif + +#ifndef VMA_MIN_ALIGNMENT + /** + Minimum alignment of all allocations, in bytes. + Set to more than 1 for debugging purposes. Must be power of two. + */ + #ifdef VMA_DEBUG_ALIGNMENT // Old name + #define VMA_MIN_ALIGNMENT VMA_DEBUG_ALIGNMENT + #else + #define VMA_MIN_ALIGNMENT (1) + #endif +#endif + +#ifndef VMA_DEBUG_MARGIN + /** + Minimum margin after every allocation, in bytes. + Set nonzero for debugging purposes only. + */ + #define VMA_DEBUG_MARGIN (0) +#endif + +#ifndef VMA_DEBUG_INITIALIZE_ALLOCATIONS + /** + Define this macro to 1 to automatically fill new allocations and destroyed + allocations with some bit pattern. + */ + #define VMA_DEBUG_INITIALIZE_ALLOCATIONS (0) +#endif + +#ifndef VMA_DEBUG_DETECT_CORRUPTION + /** + Define this macro to 1 together with non-zero value of VMA_DEBUG_MARGIN to + enable writing magic value to the margin after every allocation and + validating it, so that memory corruptions (out-of-bounds writes) are detected. + */ + #define VMA_DEBUG_DETECT_CORRUPTION (0) +#endif + +#ifndef VMA_DEBUG_GLOBAL_MUTEX + /** + Set this to 1 for debugging purposes only, to enable single mutex protecting all + entry calls to the library. Can be useful for debugging multithreading issues. + */ + #define VMA_DEBUG_GLOBAL_MUTEX (0) +#endif + +#ifndef VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY + /** + Minimum value for VkPhysicalDeviceLimits::bufferImageGranularity. + Set to more than 1 for debugging purposes only. Must be power of two. + */ + #define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY (1) +#endif + +#ifndef VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + /* + Set this to 1 to make VMA never exceed VkPhysicalDeviceLimits::maxMemoryAllocationCount + and return error instead of leaving up to Vulkan implementation what to do in such cases. + */ + #define VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT (0) +#endif + +#ifndef VMA_SMALL_HEAP_MAX_SIZE + /// Maximum size of a memory heap in Vulkan to consider it "small". + #define VMA_SMALL_HEAP_MAX_SIZE (1024ull * 1024 * 1024) +#endif + +#ifndef VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE + /// Default size of a block allocated as single VkDeviceMemory from a "large" heap. + #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) +#endif + +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDoc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + +#define VMA_VALIDATE(cond) do { if(!(cond)) { \ + VMA_ASSERT(0 && "Validation failed: " #cond); \ + return false; \ + } } while(false) + +/******************************************************************************* +END OF CONFIGURATION +*/ +#endif // _VMA_CONFIGURATION + + +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_CREATED = 0xDC; +static const uint8_t VMA_ALLOCATION_FILL_PATTERN_DESTROYED = 0xEF; +// Decimal 2139416166, float NaN, little-endian binary 66 E6 84 7F. +static const uint32_t VMA_CORRUPTION_DETECTION_MAGIC_VALUE = 0x7F84E666; + +// Copy of some Vulkan definitions so we don't need to check their existence just to handle few constants. +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY = 0x00000040; +static const uint32_t VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY = 0x00000080; +static const uint32_t VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY = 0x00020000; +static const uint32_t VK_IMAGE_CREATE_DISJOINT_BIT_COPY = 0x00000200; +static const int32_t VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY = 1000158000; +static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; +static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; +static const uint32_t VMA_VENDOR_ID_AMD = 4098; + +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + + +#if VMA_STATS_STRING_ENABLED +// Correspond to values of enum VmaSuballocationType. +static const char* VMA_SUBALLOCATION_TYPE_NAMES[] = +{ + "FREE", + "UNKNOWN", + "BUFFER", + "IMAGE_UNKNOWN", + "IMAGE_LINEAR", + "IMAGE_OPTIMAL", +}; +#endif + +static VkAllocationCallbacks VmaEmptyAllocationCallbacks = + { VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL, VMA_NULL }; + + +#ifndef _VMA_ENUM_DECLARATIONS + +enum VmaSuballocationType +{ + VMA_SUBALLOCATION_TYPE_FREE = 0, + VMA_SUBALLOCATION_TYPE_UNKNOWN = 1, + VMA_SUBALLOCATION_TYPE_BUFFER = 2, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN = 3, + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR = 4, + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL = 5, + VMA_SUBALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +}; + +enum VMA_CACHE_OPERATION +{ + VMA_CACHE_FLUSH, + VMA_CACHE_INVALIDATE +}; + +enum class VmaAllocationRequestType +{ + Normal, + TLSF, + // Used by "Linear" algorithm. + UpperAddress, + EndOf1st, + EndOf2nd, +}; + +#endif // _VMA_ENUM_DECLARATIONS + +#ifndef _VMA_FORWARD_DECLARATIONS +// Opaque handle used by allocation algorithms to identify single allocation in any conforming way. +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaAllocHandle); + +struct VmaMutexLock; +struct VmaMutexLockRead; +struct VmaMutexLockWrite; + +template +struct AtomicTransactionalIncrement; + +template +struct VmaStlAllocator; + +template +class VmaVector; + +template +class VmaSmallVector; + +template +class VmaPoolAllocator; + +template +struct VmaListItem; + +template +class VmaRawList; + +template +class VmaList; + +template +class VmaIntrusiveLinkedList; + +#if VMA_STATS_STRING_ENABLED +class VmaStringBuilder; +class VmaJsonWriter; +#endif + +class VmaDeviceMemoryBlock; + +struct VmaDedicatedAllocationListItemTraits; +class VmaDedicatedAllocationList; + +struct VmaSuballocation; +struct VmaSuballocationOffsetLess; +struct VmaSuballocationOffsetGreater; +struct VmaSuballocationItemSizeLess; + +typedef VmaList> VmaSuballocationList; + +struct VmaAllocationRequest; + +class VmaBlockMetadata; +class VmaBlockMetadata_Linear; +class VmaBlockMetadata_TLSF; + +class VmaBlockVector; + +struct VmaPoolListItemTraits; + +struct VmaCurrentBudgetData; + +class VmaAllocationObjectAllocator; + +#endif // _VMA_FORWARD_DECLARATIONS + + +#ifndef _VMA_FUNCTIONS + +/* +Returns number of bits set to 1 in (v). + +On specific platforms and compilers you can use intrinsics like: + +Visual Studio: + return __popcnt(v); +GCC, Clang: + return static_cast(__builtin_popcount(v)); + +Define macro VMA_COUNT_BITS_SET to provide your optimized implementation. +But you need to check in runtime whether user's CPU supports these, as some old processors don't. +*/ +static inline uint32_t VmaCountBitsSet(uint32_t v) +{ +#if VMA_CPP20 + return std::popcount(v); +#else + uint32_t c = v - ((v >> 1) & 0x55555555); + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; + c = ((c >> 16) + c) & 0x0000FFFF; + return c; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanForward64(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffsll(mask)) - 1U; +#else + uint8_t pos = 0; + uint64_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 63); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanLSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanForward(&pos, mask)) + return static_cast(pos); + return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; +#elif defined __GNUC__ || defined __clang__ + return static_cast(__builtin_ffs(mask)) - 1U; +#else + uint8_t pos = 0; + uint32_t bit = 1; + do + { + if (mask & bit) + return pos; + bit <<= 1; + } while (pos++ < 31); + return UINT8_MAX; +#endif +} + +static inline uint8_t VmaBitScanMSB(uint64_t mask) +{ +#if defined(_MSC_VER) && defined(_WIN64) + unsigned long pos; + if (_BitScanReverse64(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 63 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 63 - static_cast(__builtin_clzll(mask)); +#else + uint8_t pos = 63; + uint64_t bit = 1ULL << 63; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +static inline uint8_t VmaBitScanMSB(uint32_t mask) +{ +#ifdef _MSC_VER + unsigned long pos; + if (_BitScanReverse(&pos, mask)) + return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 31 - static_cast(std::countl_zero(mask)); +#elif defined __GNUC__ || defined __clang__ + if (mask) + return 31 - static_cast(__builtin_clz(mask)); +#else + uint8_t pos = 31; + uint32_t bit = 1UL << 31; + do + { + if (mask & bit) + return pos; + bit >>= 1; + } while (pos-- > 0); +#endif + return UINT8_MAX; +} + +/* +Returns true if given number is a power of two. +T must be unsigned integer number or signed integer but always nonnegative. +For 0 returns true. +*/ +template +inline bool VmaIsPow2(T x) +{ + return (x & (x - 1)) == 0; +} + +// Aligns given value up to nearest multiply of align value. For example: VmaAlignUp(11, 8) = 16. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignUp(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return (val + alignment - 1) & ~(alignment - 1); +} + +// Aligns given value down to nearest multiply of align value. For example: VmaAlignDown(11, 8) = 8. +// Use types like uint32_t, uint64_t as T. +template +static inline T VmaAlignDown(T val, T alignment) +{ + VMA_HEAVY_ASSERT(VmaIsPow2(alignment)); + return val & ~(alignment - 1); +} + +// Division with mathematical rounding to nearest number. +template +static inline T VmaRoundDiv(T x, T y) +{ + return (x + (y / (T)2)) / y; +} + +// Divide by 'y' and round up to nearest integer. +template +static inline T VmaDivideRoundingUp(T x, T y) +{ + return (x + y - (T)1) / y; +} + +// Returns smallest power of 2 greater or equal to v. +static inline uint32_t VmaNextPow2(uint32_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +static inline uint64_t VmaNextPow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; +} + +// Returns largest power of 2 less or equal to v. +static inline uint32_t VmaPrevPow2(uint32_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v = v ^ (v >> 1); + return v; +} + +static inline uint64_t VmaPrevPow2(uint64_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v = v ^ (v >> 1); + return v; +} + +static inline bool VmaStrIsEmpty(const char* pStr) +{ + return pStr == VMA_NULL || *pStr == '\0'; +} + +/* +Returns true if two memory blocks occupy overlapping pages. +ResourceA must be in less memory offset than ResourceB. + +Algorithm is based on "Vulkan 1.0.39 - A Specification (with all registered Vulkan extensions)" +chapter 11.6 "Resource Memory Association", paragraph "Buffer-Image Granularity". +*/ +static inline bool VmaBlocksOnSamePage( + VkDeviceSize resourceAOffset, + VkDeviceSize resourceASize, + VkDeviceSize resourceBOffset, + VkDeviceSize pageSize) +{ + VMA_ASSERT(resourceAOffset + resourceASize <= resourceBOffset && resourceASize > 0 && pageSize > 0); + VkDeviceSize resourceAEnd = resourceAOffset + resourceASize - 1; + VkDeviceSize resourceAEndPage = resourceAEnd & ~(pageSize - 1); + VkDeviceSize resourceBStart = resourceBOffset; + VkDeviceSize resourceBStartPage = resourceBStart & ~(pageSize - 1); + return resourceAEndPage == resourceBStartPage; +} + +/* +Returns true if given suballocation types could conflict and must respect +VkPhysicalDeviceLimits::bufferImageGranularity. They conflict if one is buffer +or linear image and another one is optimal image. If type is unknown, behave +conservatively. +*/ +static inline bool VmaIsBufferImageGranularityConflict( + VmaSuballocationType suballocType1, + VmaSuballocationType suballocType2) +{ + if (suballocType1 > suballocType2) + { + std::swap(suballocType1, suballocType2); + } + + switch (suballocType1) + { + case VMA_SUBALLOCATION_TYPE_FREE: + return false; + case VMA_SUBALLOCATION_TYPE_UNKNOWN: + return true; + case VMA_SUBALLOCATION_TYPE_BUFFER: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR || + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR: + return + suballocType2 == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL; + case VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL: + return false; + default: + VMA_ASSERT(0); + return true; + } +} + +static void VmaWriteMagicValue(void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + uint32_t* pDst = (uint32_t*)((char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pDst) + { + *pDst = VMA_CORRUPTION_DETECTION_MAGIC_VALUE; + } +#else + // no-op +#endif +} + +static bool VmaValidateMagicValue(const void* pData, VkDeviceSize offset) +{ +#if VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_DETECT_CORRUPTION + const uint32_t* pSrc = (const uint32_t*)((const char*)pData + offset); + const size_t numberCount = VMA_DEBUG_MARGIN / sizeof(uint32_t); + for (size_t i = 0; i < numberCount; ++i, ++pSrc) + { + if (*pSrc != VMA_CORRUPTION_DETECTION_MAGIC_VALUE) + { + return false; + } + } +#endif + return true; +} + +/* +Fills structure with parameters of an example buffer to be used for transfers +during GPU memory defragmentation. +*/ +static void VmaFillGpuDefragmentationBufferCreateInfo(VkBufferCreateInfo& outBufCreateInfo) +{ + memset(&outBufCreateInfo, 0, sizeof(outBufCreateInfo)); + outBufCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + outBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + outBufCreateInfo.size = (VkDeviceSize)VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE; // Example size. +} + + +/* +Performs binary search and returns iterator to first element that is greater or +equal to (key), according to comparison (cmp). + +Cmp should return true if first argument is less than second argument. + +Returned value is the found element, if present in the collection or place where +new element with value (key) should be inserted. +*/ +template +static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) +{ + size_t down = 0, up = size_t(end - beg); + while (down < up) + { + const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation + if (cmp(*(beg + mid), key)) + { + down = mid + 1; + } + else + { + up = mid; + } + } + return beg + down; +} + +template +IterT VmaBinaryFindSorted(const IterT& beg, const IterT& end, const KeyT& value, const CmpLess& cmp) +{ + IterT it = VmaBinaryFindFirstNotLess( + beg, end, value, cmp); + if (it == end || + (!cmp(*it, value) && !cmp(value, *it))) + { + return it; + } + return end; +} + +/* +Returns true if all pointers in the array are not-null and unique. +Warning! O(n^2) complexity. Use only inside VMA_HEAVY_ASSERT. +T must be pointer type, e.g. VmaAllocation, VmaPool. +*/ +template +static bool VmaValidatePointerArray(uint32_t count, const T* arr) +{ + for (uint32_t i = 0; i < count; ++i) + { + const T iPtr = arr[i]; + if (iPtr == VMA_NULL) + { + return false; + } + for (uint32_t j = i + 1; j < count; ++j) + { + if (iPtr == arr[j]) + { + return false; + } + } + } + return true; +} + +template +static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) +{ + newStruct->pNext = mainStruct->pNext; + mainStruct->pNext = newStruct; +} +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ + for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; + s != VMA_NULL; s = s->pNext) + { + if(s->sType == sType) + { + return (const FindT*)s; + } + } + return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 + typedef uint64_t BaseType; // VkFlags64 +#else + typedef uint32_t BaseType; // VkFlags32 +#endif + + static const VmaBufferImageUsage UNKNOWN; + + BaseType Value; + + VmaBufferImageUsage() { *this = UNKNOWN; } + explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } + VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); + explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + + bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } + bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + + bool Contains(BaseType flag) const { return (Value & flag) != 0; } + bool ContainsDeviceAccess() const + { + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. + return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, + bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 + if(useKhrMaintenance5) + { + // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, + // take usage from it and ignore VkBufferCreateInfo::usage, per specification + // of the VK_KHR_maintenance5 extension. + const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = + VmaPnextChainFind(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if(usageFlags2) + { + this->Value = usageFlags2->usage; + return; + } + } +#endif + + this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ + // Maybe in the future there will be VK_KHR_maintenanceN extension with structure + // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + + this->Value = (BaseType)createInfo.usage; +} + +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VmaBufferImageUsage bufImgUsage, + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." + " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); + return false; + } + + const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + else + { + // Always CPU memory. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // if(deviceAccess) + // + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, + // unless there is a clear preference from the user not to do so. + // + // else: + // + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// Memory allocation + +static void* VmaMalloc(const VkAllocationCallbacks* pAllocationCallbacks, size_t size, size_t alignment) +{ + void* result = VMA_NULL; + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnAllocation != VMA_NULL)) + { + result = (*pAllocationCallbacks->pfnAllocation)( + pAllocationCallbacks->pUserData, + size, + alignment, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + } + else + { + result = VMA_SYSTEM_ALIGNED_MALLOC(size, alignment); + } + VMA_ASSERT(result != VMA_NULL && "CPU memory allocation failed."); + return result; +} + +static void VmaFree(const VkAllocationCallbacks* pAllocationCallbacks, void* ptr) +{ + if ((pAllocationCallbacks != VMA_NULL) && + (pAllocationCallbacks->pfnFree != VMA_NULL)) + { + (*pAllocationCallbacks->pfnFree)(pAllocationCallbacks->pUserData, ptr); + } + else + { + VMA_SYSTEM_ALIGNED_FREE(ptr); + } +} + +template +static T* VmaAllocate(const VkAllocationCallbacks* pAllocationCallbacks) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(const VkAllocationCallbacks* pAllocationCallbacks, size_t count) +{ + return (T*)VmaMalloc(pAllocationCallbacks, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +#define vma_new(allocator, type) new(VmaAllocate(allocator))(type) + +#define vma_new_array(allocator, type, count) new(VmaAllocateArray((allocator), (count)))(type) + +template +static void vma_delete(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr) +{ + ptr->~T(); + VmaFree(pAllocationCallbacks, ptr); +} + +template +static void vma_delete_array(const VkAllocationCallbacks* pAllocationCallbacks, T* ptr, size_t count) +{ + if (ptr != VMA_NULL) + { + for (size_t i = count; i--; ) + { + ptr[i].~T(); + } + VmaFree(pAllocationCallbacks, ptr); + } +} + +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr) +{ + if (srcStr != VMA_NULL) + { + const size_t len = strlen(srcStr); + char* const result = vma_new_array(allocs, char, len + 1); + memcpy(result, srcStr, len + 1); + return result; + } + return VMA_NULL; +} + +#if VMA_STATS_STRING_ENABLED +static char* VmaCreateStringCopy(const VkAllocationCallbacks* allocs, const char* srcStr, size_t strLen) +{ + if (srcStr != VMA_NULL) + { + char* const result = vma_new_array(allocs, char, strLen + 1); + memcpy(result, srcStr, strLen); + result[strLen] = '\0'; + return result; + } + return VMA_NULL; +} +#endif // VMA_STATS_STRING_ENABLED + +static void VmaFreeString(const VkAllocationCallbacks* allocs, char* str) +{ + if (str != VMA_NULL) + { + const size_t len = strlen(str); + vma_delete_array(allocs, str, len + 1); + } +} + +template +size_t VmaVectorInsertSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + const size_t indexToInsert = VmaBinaryFindFirstNotLess( + vector.data(), + vector.data() + vector.size(), + value, + CmpLess()) - vector.data(); + VmaVectorInsert(vector, indexToInsert, value); + return indexToInsert; +} + +template +bool VmaVectorRemoveSorted(VectorT& vector, const typename VectorT::value_type& value) +{ + CmpLess comparator; + typename VectorT::iterator it = VmaBinaryFindFirstNotLess( + vector.begin(), + vector.end(), + value, + comparator); + if ((it != vector.end()) && !comparator(*it, value) && !comparator(value, *it)) + { + size_t indexToRemove = it - vector.begin(); + VmaVectorRemove(vector, indexToRemove); + return true; + } + return false; +} +#endif // _VMA_FUNCTIONS + +#ifndef _VMA_STATISTICS_FUNCTIONS + +static void VmaClearStatistics(VmaStatistics& outStats) +{ + outStats.blockCount = 0; + outStats.allocationCount = 0; + outStats.blockBytes = 0; + outStats.allocationBytes = 0; +} + +static void VmaAddStatistics(VmaStatistics& inoutStats, const VmaStatistics& src) +{ + inoutStats.blockCount += src.blockCount; + inoutStats.allocationCount += src.allocationCount; + inoutStats.blockBytes += src.blockBytes; + inoutStats.allocationBytes += src.allocationBytes; +} + +static void VmaClearDetailedStatistics(VmaDetailedStatistics& outStats) +{ + VmaClearStatistics(outStats.statistics); + outStats.unusedRangeCount = 0; + outStats.allocationSizeMin = VK_WHOLE_SIZE; + outStats.allocationSizeMax = 0; + outStats.unusedRangeSizeMin = VK_WHOLE_SIZE; + outStats.unusedRangeSizeMax = 0; +} + +static void VmaAddDetailedStatisticsAllocation(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.statistics.allocationCount++; + inoutStats.statistics.allocationBytes += size; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, size); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, size); +} + +static void VmaAddDetailedStatisticsUnusedRange(VmaDetailedStatistics& inoutStats, VkDeviceSize size) +{ + inoutStats.unusedRangeCount++; + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, size); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, size); +} + +static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const VmaDetailedStatistics& src) +{ + VmaAddStatistics(inoutStats.statistics, src.statistics); + inoutStats.unusedRangeCount += src.unusedRangeCount; + inoutStats.allocationSizeMin = VMA_MIN(inoutStats.allocationSizeMin, src.allocationSizeMin); + inoutStats.allocationSizeMax = VMA_MAX(inoutStats.allocationSizeMax, src.allocationSizeMax); + inoutStats.unusedRangeSizeMin = VMA_MIN(inoutStats.unusedRangeSizeMin, src.unusedRangeSizeMin); + inoutStats.unusedRangeSizeMax = VMA_MAX(inoutStats.unusedRangeSizeMax, src.unusedRangeSizeMax); +} + +#endif // _VMA_STATISTICS_FUNCTIONS + +#ifndef _VMA_MUTEX_LOCK +// Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). +struct VmaMutexLock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) +public: + VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->Lock(); } + } + ~VmaMutexLock() { if (m_pMutex) { m_pMutex->Unlock(); } } + +private: + VMA_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. +struct VmaMutexLockRead +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) +public: + VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : + m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockRead(); } + } + ~VmaMutexLockRead() { if (m_pMutex) { m_pMutex->UnlockRead(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +// Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. +struct VmaMutexLockWrite +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) +public: + VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) + : m_pMutex(useMutex ? &mutex : VMA_NULL) + { + if (m_pMutex) { m_pMutex->LockWrite(); } + } + ~VmaMutexLockWrite() { if (m_pMutex) { m_pMutex->UnlockWrite(); } } + +private: + VMA_RW_MUTEX* m_pMutex; +}; + +#if VMA_DEBUG_GLOBAL_MUTEX + static VMA_MUTEX gDebugGlobalMutex; + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK VmaMutexLock debugGlobalMutexLock(gDebugGlobalMutex, true); +#else + #define VMA_DEBUG_GLOBAL_MUTEX_LOCK +#endif +#endif // _VMA_MUTEX_LOCK + +#ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT +// An object that increments given atomic but decrements it back in the destructor unless Commit() is called. +template +struct AtomicTransactionalIncrement +{ +public: + using T = decltype(AtomicT().load()); + + ~AtomicTransactionalIncrement() + { + if(m_Atomic) + --(*m_Atomic); + } + + void Commit() { m_Atomic = VMA_NULL; } + T Increment(AtomicT* atomic) + { + m_Atomic = atomic; + return m_Atomic->fetch_add(1); + } + +private: + AtomicT* m_Atomic = VMA_NULL; +}; +#endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT + +#ifndef _VMA_STL_ALLOCATOR +// STL-compatible allocator. +template +struct VmaStlAllocator +{ + const VkAllocationCallbacks* const m_pCallbacks; + typedef T value_type; + + VmaStlAllocator(const VkAllocationCallbacks* pCallbacks) : m_pCallbacks(pCallbacks) {} + template + VmaStlAllocator(const VmaStlAllocator& src) : m_pCallbacks(src.m_pCallbacks) {} + VmaStlAllocator(const VmaStlAllocator&) = default; + VmaStlAllocator& operator=(const VmaStlAllocator&) = delete; + + T* allocate(size_t n) { return VmaAllocateArray(m_pCallbacks, n); } + void deallocate(T* p, size_t n) { VmaFree(m_pCallbacks, p); } + + template + bool operator==(const VmaStlAllocator& rhs) const + { + return m_pCallbacks == rhs.m_pCallbacks; + } + template + bool operator!=(const VmaStlAllocator& rhs) const + { + return m_pCallbacks != rhs.m_pCallbacks; + } +}; +#endif // _VMA_STL_ALLOCATOR + +#ifndef _VMA_VECTOR +/* Class with interface compatible with subset of std::vector. +T must be POD because constructors and destructors are not called and memcpy is +used for these objects. */ +template +class VmaVector +{ +public: + typedef T value_type; + typedef T* iterator; + typedef const T* const_iterator; + + VmaVector(const AllocatorT& allocator); + VmaVector(size_t count, const AllocatorT& allocator); + // This version of the constructor is here for compatibility with pre-C++14 std::vector. + // value is unused. + VmaVector(size_t count, const T& value, const AllocatorT& allocator) : VmaVector(count, allocator) {} + VmaVector(const VmaVector& src); + VmaVector& operator=(const VmaVector& rhs); + ~VmaVector() { VmaFree(m_Allocator.m_pCallbacks, m_pArray); } + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_pArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + const T* data() const { return m_pArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return m_pArray[m_Count - 1]; } + + iterator begin() { return m_pArray; } + iterator end() { return m_pArray + m_Count; } + const_iterator cbegin() const { return m_pArray; } + const_iterator cend() const { return m_pArray + m_Count; } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void reserve(size_t newCapacity, bool freeMemory = false); + void resize(size_t newCount); + void clear() { resize(0); } + void shrink_to_fit(); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return m_pArray[index]; } + +private: + AllocatorT m_Allocator; + T* m_pArray; + size_t m_Count; + size_t m_Capacity; +}; + +#ifndef _VMA_VECTOR_FUNCTIONS +template +VmaVector::VmaVector(const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(VMA_NULL), + m_Count(0), + m_Capacity(0) {} + +template +VmaVector::VmaVector(size_t count, const AllocatorT& allocator) + : m_Allocator(allocator), + m_pArray(count ? (T*)VmaAllocateArray(allocator.m_pCallbacks, count) : VMA_NULL), + m_Count(count), + m_Capacity(count) {} + +template +VmaVector::VmaVector(const VmaVector& src) + : m_Allocator(src.m_Allocator), + m_pArray(src.m_Count ? (T*)VmaAllocateArray(src.m_Allocator.m_pCallbacks, src.m_Count) : VMA_NULL), + m_Count(src.m_Count), + m_Capacity(src.m_Count) +{ + if (m_Count != 0) + { + memcpy(m_pArray, src.m_pArray, m_Count * sizeof(T)); + } +} + +template +VmaVector& VmaVector::operator=(const VmaVector& rhs) +{ + if (&rhs != this) + { + resize(rhs.m_Count); + if (m_Count != 0) + { + memcpy(m_pArray, rhs.m_pArray, m_Count * sizeof(T)); + } + } + return *this; +} + +template +void VmaVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + m_pArray[newIndex] = src; +} + +template +void VmaVector::reserve(size_t newCapacity, bool freeMemory) +{ + newCapacity = VMA_MAX(newCapacity, m_Count); + + if ((newCapacity < m_Capacity) && !freeMemory) + { + newCapacity = m_Capacity; + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator, newCapacity) : VMA_NULL; + if (m_Count != 0) + { + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } +} + +template +void VmaVector::resize(size_t newCount) +{ + size_t newCapacity = m_Capacity; + if (newCount > m_Capacity) + { + newCapacity = VMA_MAX(newCount, VMA_MAX(m_Capacity * 3 / 2, (size_t)8)); + } + + if (newCapacity != m_Capacity) + { + T* const newArray = newCapacity ? VmaAllocateArray(m_Allocator.m_pCallbacks, newCapacity) : VMA_NULL; + const size_t elementsToCopy = VMA_MIN(m_Count, newCount); + if (elementsToCopy != 0) + { + memcpy(newArray, m_pArray, elementsToCopy * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = newCapacity; + m_pArray = newArray; + } + + m_Count = newCount; +} + +template +void VmaVector::shrink_to_fit() +{ + if (m_Capacity > m_Count) + { + T* newArray = VMA_NULL; + if (m_Count > 0) + { + newArray = VmaAllocateArray(m_Allocator.m_pCallbacks, m_Count); + memcpy(newArray, m_pArray, m_Count * sizeof(T)); + } + VmaFree(m_Allocator.m_pCallbacks, m_pArray); + m_Capacity = m_Count; + m_pArray = newArray; + } +} + +template +void VmaVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + if (index < oldCount) + { + memmove(m_pArray + (index + 1), m_pArray + index, (oldCount - index) * sizeof(T)); + } + m_pArray[index] = src; +} + +template +void VmaVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + memmove(m_pArray + index, m_pArray + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_VECTOR_FUNCTIONS + +template +static void VmaVectorInsert(VmaVector& vec, size_t index, const T& item) +{ + vec.insert(index, item); +} + +template +static void VmaVectorRemove(VmaVector& vec, size_t index) +{ + vec.remove(index); +} +#endif // _VMA_VECTOR + +#ifndef _VMA_SMALL_VECTOR +/* +This is a vector (a variable-sized array), optimized for the case when the array is small. + +It contains some number of elements in-place, which allows it to avoid heap allocation +when the actual number of elements is below that threshold. This allows normal "small" +cases to be fast without losing generality for large inputs. +*/ +template +class VmaSmallVector +{ +public: + typedef T value_type; + typedef T* iterator; + + VmaSmallVector(const AllocatorT& allocator); + VmaSmallVector(size_t count, const AllocatorT& allocator); + template + VmaSmallVector(const VmaSmallVector&) = delete; + template + VmaSmallVector& operator=(const VmaSmallVector&) = delete; + ~VmaSmallVector() = default; + + bool empty() const { return m_Count == 0; } + size_t size() const { return m_Count; } + T* data() { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + T& front() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + T& back() { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + const T* data() const { return m_Count > N ? m_DynamicArray.data() : m_StaticArray; } + const T& front() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[0]; } + const T& back() const { VMA_HEAVY_ASSERT(m_Count > 0); return data()[m_Count - 1]; } + + iterator begin() { return data(); } + iterator end() { return data() + m_Count; } + + void pop_front() { VMA_HEAVY_ASSERT(m_Count > 0); remove(0); } + void pop_back() { VMA_HEAVY_ASSERT(m_Count > 0); resize(size() - 1); } + void push_front(const T& src) { insert(0, src); } + + void push_back(const T& src); + void resize(size_t newCount, bool freeMemory = false); + void clear(bool freeMemory = false); + void insert(size_t index, const T& src); + void remove(size_t index); + + T& operator[](size_t index) { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + const T& operator[](size_t index) const { VMA_HEAVY_ASSERT(index < m_Count); return data()[index]; } + +private: + size_t m_Count; + T m_StaticArray[N]; // Used when m_Size <= N + VmaVector m_DynamicArray; // Used when m_Size > N +}; + +#ifndef _VMA_SMALL_VECTOR_FUNCTIONS +template +VmaSmallVector::VmaSmallVector(const AllocatorT& allocator) + : m_Count(0), + m_DynamicArray(allocator) {} + +template +VmaSmallVector::VmaSmallVector(size_t count, const AllocatorT& allocator) + : m_Count(count), + m_DynamicArray(count > N ? count : 0, allocator) {} + +template +void VmaSmallVector::push_back(const T& src) +{ + const size_t newIndex = size(); + resize(newIndex + 1); + data()[newIndex] = src; +} + +template +void VmaSmallVector::resize(size_t newCount, bool freeMemory) +{ + if (newCount > N && m_Count > N) + { + // Any direction, staying in m_DynamicArray + m_DynamicArray.resize(newCount); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else if (newCount > N && m_Count <= N) + { + // Growing, moving from m_StaticArray to m_DynamicArray + m_DynamicArray.resize(newCount); + if (m_Count > 0) + { + memcpy(m_DynamicArray.data(), m_StaticArray, m_Count * sizeof(T)); + } + } + else if (newCount <= N && m_Count > N) + { + // Shrinking, moving from m_DynamicArray to m_StaticArray + if (newCount > 0) + { + memcpy(m_StaticArray, m_DynamicArray.data(), newCount * sizeof(T)); + } + m_DynamicArray.resize(0); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + } + else + { + // Any direction, staying in m_StaticArray - nothing to do here + } + m_Count = newCount; +} + +template +void VmaSmallVector::clear(bool freeMemory) +{ + m_DynamicArray.clear(); + if (freeMemory) + { + m_DynamicArray.shrink_to_fit(); + } + m_Count = 0; +} + +template +void VmaSmallVector::insert(size_t index, const T& src) +{ + VMA_HEAVY_ASSERT(index <= m_Count); + const size_t oldCount = size(); + resize(oldCount + 1); + T* const dataPtr = data(); + if (index < oldCount) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_StaticArray to m_DynamicArray. + memmove(dataPtr + (index + 1), dataPtr + index, (oldCount - index) * sizeof(T)); + } + dataPtr[index] = src; +} + +template +void VmaSmallVector::remove(size_t index) +{ + VMA_HEAVY_ASSERT(index < m_Count); + const size_t oldCount = size(); + if (index < oldCount - 1) + { + // I know, this could be more optimal for case where memmove can be memcpy directly from m_DynamicArray to m_StaticArray. + T* const dataPtr = data(); + memmove(dataPtr + index, dataPtr + (index + 1), (oldCount - index - 1) * sizeof(T)); + } + resize(oldCount - 1); +} +#endif // _VMA_SMALL_VECTOR_FUNCTIONS +#endif // _VMA_SMALL_VECTOR + +#ifndef _VMA_POOL_ALLOCATOR +/* +Allocator for objects of type T using a list of arrays (pools) to speed up +allocation. Number of elements that can be allocated is not bounded because +allocator can create multiple blocks. +*/ +template +class VmaPoolAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) +public: + VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); + ~VmaPoolAllocator(); + template T* Alloc(Types&&... args); + void Free(T* ptr); + +private: + union Item + { + uint32_t NextFreeIndex; + alignas(T) char Value[sizeof(T)]; + }; + struct ItemBlock + { + Item* pItems; + uint32_t Capacity; + uint32_t FirstFreeIndex; + }; + + const VkAllocationCallbacks* m_pAllocationCallbacks; + const uint32_t m_FirstBlockCapacity; + VmaVector> m_ItemBlocks; + + ItemBlock& CreateNewBlock(); +}; + +#ifndef _VMA_POOL_ALLOCATOR_FUNCTIONS +template +VmaPoolAllocator::VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_FirstBlockCapacity(firstBlockCapacity), + m_ItemBlocks(VmaStlAllocator(pAllocationCallbacks)) +{ + VMA_ASSERT(m_FirstBlockCapacity > 1); +} + +template +VmaPoolAllocator::~VmaPoolAllocator() +{ + for (size_t i = m_ItemBlocks.size(); i--;) + vma_delete_array(m_pAllocationCallbacks, m_ItemBlocks[i].pItems, m_ItemBlocks[i].Capacity); + m_ItemBlocks.clear(); +} + +template +template T* VmaPoolAllocator::Alloc(Types&&... args) +{ + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + // This block has some free items: Use first one. + if (block.FirstFreeIndex != UINT32_MAX) + { + Item* const pItem = &block.pItems[block.FirstFreeIndex]; + block.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result)T(std::forward(args)...); // Explicit constructor call. + return result; + } + } + + // No block has free item: Create new one and use it. + ItemBlock& newBlock = CreateNewBlock(); + Item* const pItem = &newBlock.pItems[0]; + newBlock.FirstFreeIndex = pItem->NextFreeIndex; + T* result = (T*)&pItem->Value; + new(result) T(std::forward(args)...); // Explicit constructor call. + return result; +} + +template +void VmaPoolAllocator::Free(T* ptr) +{ + // Search all memory blocks to find ptr. + for (size_t i = m_ItemBlocks.size(); i--; ) + { + ItemBlock& block = m_ItemBlocks[i]; + + // Casting to union. + Item* pItemPtr; + memcpy(&pItemPtr, &ptr, sizeof(pItemPtr)); + + // Check if pItemPtr is in address range of this block. + if ((pItemPtr >= block.pItems) && (pItemPtr < block.pItems + block.Capacity)) + { + ptr->~T(); // Explicit destructor call. + const uint32_t index = static_cast(pItemPtr - block.pItems); + pItemPtr->NextFreeIndex = block.FirstFreeIndex; + block.FirstFreeIndex = index; + return; + } + } + VMA_ASSERT(0 && "Pointer doesn't belong to this memory pool."); +} + +template +typename VmaPoolAllocator::ItemBlock& VmaPoolAllocator::CreateNewBlock() +{ + const uint32_t newBlockCapacity = m_ItemBlocks.empty() ? + m_FirstBlockCapacity : m_ItemBlocks.back().Capacity * 3 / 2; + + const ItemBlock newBlock = + { + vma_new_array(m_pAllocationCallbacks, Item, newBlockCapacity), + newBlockCapacity, + 0 + }; + + m_ItemBlocks.push_back(newBlock); + + // Setup singly-linked list of all free items in this block. + for (uint32_t i = 0; i < newBlockCapacity - 1; ++i) + newBlock.pItems[i].NextFreeIndex = i + 1; + newBlock.pItems[newBlockCapacity - 1].NextFreeIndex = UINT32_MAX; + return m_ItemBlocks.back(); +} +#endif // _VMA_POOL_ALLOCATOR_FUNCTIONS +#endif // _VMA_POOL_ALLOCATOR + +#ifndef _VMA_RAW_LIST +template +struct VmaListItem +{ + VmaListItem* pPrev; + VmaListItem* pNext; + T Value; +}; + +// Doubly linked list. +template +class VmaRawList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) +public: + typedef VmaListItem ItemType; + + VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks); + // Intentionally not calling Clear, because that would be unnecessary + // computations to return all items to m_ItemAllocator as free. + ~VmaRawList() = default; + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + + ItemType* Front() { return m_pFront; } + ItemType* Back() { return m_pBack; } + const ItemType* Front() const { return m_pFront; } + const ItemType* Back() const { return m_pBack; } + + ItemType* PushFront(); + ItemType* PushBack(); + ItemType* PushFront(const T& value); + ItemType* PushBack(const T& value); + void PopFront(); + void PopBack(); + + // Item can be null - it means PushBack. + ItemType* InsertBefore(ItemType* pItem); + // Item can be null - it means PushFront. + ItemType* InsertAfter(ItemType* pItem); + ItemType* InsertBefore(ItemType* pItem, const T& value); + ItemType* InsertAfter(ItemType* pItem, const T& value); + + void Clear(); + void Remove(ItemType* pItem); + +private: + const VkAllocationCallbacks* const m_pAllocationCallbacks; + VmaPoolAllocator m_ItemAllocator; + ItemType* m_pFront; + ItemType* m_pBack; + size_t m_Count; +}; + +#ifndef _VMA_RAW_LIST_FUNCTIONS +template +VmaRawList::VmaRawList(const VkAllocationCallbacks* pAllocationCallbacks) + : m_pAllocationCallbacks(pAllocationCallbacks), + m_ItemAllocator(pAllocationCallbacks, 128), + m_pFront(VMA_NULL), + m_pBack(VMA_NULL), + m_Count(0) {} + +template +VmaListItem* VmaRawList::PushFront() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pPrev = VMA_NULL; + if (IsEmpty()) + { + pNewItem->pNext = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pNext = m_pFront; + m_pFront->pPrev = pNewItem; + m_pFront = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack() +{ + ItemType* const pNewItem = m_ItemAllocator.Alloc(); + pNewItem->pNext = VMA_NULL; + if(IsEmpty()) + { + pNewItem->pPrev = VMA_NULL; + m_pFront = pNewItem; + m_pBack = pNewItem; + m_Count = 1; + } + else + { + pNewItem->pPrev = m_pBack; + m_pBack->pNext = pNewItem; + m_pBack = pNewItem; + ++m_Count; + } + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushFront(const T& value) +{ + ItemType* const pNewItem = PushFront(); + pNewItem->Value = value; + return pNewItem; +} + +template +VmaListItem* VmaRawList::PushBack(const T& value) +{ + ItemType* const pNewItem = PushBack(); + pNewItem->Value = value; + return pNewItem; +} + +template +void VmaRawList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pFrontItem = m_pFront; + ItemType* const pNextItem = pFrontItem->pNext; + if (pNextItem != VMA_NULL) + { + pNextItem->pPrev = VMA_NULL; + } + m_pFront = pNextItem; + m_ItemAllocator.Free(pFrontItem); + --m_Count; +} + +template +void VmaRawList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const pBackItem = m_pBack; + ItemType* const pPrevItem = pBackItem->pPrev; + if(pPrevItem != VMA_NULL) + { + pPrevItem->pNext = VMA_NULL; + } + m_pBack = pPrevItem; + m_ItemAllocator.Free(pBackItem); + --m_Count; +} + +template +void VmaRawList::Clear() +{ + if (IsEmpty() == false) + { + ItemType* pItem = m_pBack; + while (pItem != VMA_NULL) + { + ItemType* const pPrevItem = pItem->pPrev; + m_ItemAllocator.Free(pItem); + pItem = pPrevItem; + } + m_pFront = VMA_NULL; + m_pBack = VMA_NULL; + m_Count = 0; + } +} + +template +void VmaRawList::Remove(ItemType* pItem) +{ + VMA_HEAVY_ASSERT(pItem != VMA_NULL); + VMA_HEAVY_ASSERT(m_Count > 0); + + if(pItem->pPrev != VMA_NULL) + { + pItem->pPrev->pNext = pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = pItem->pNext; + } + + if(pItem->pNext != VMA_NULL) + { + pItem->pNext->pPrev = pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = pItem->pPrev; + } + + m_ItemAllocator.Free(pItem); + --m_Count; +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const prevItem = pItem->pPrev; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pPrev = prevItem; + newItem->pNext = pItem; + pItem->pPrev = newItem; + if(prevItem != VMA_NULL) + { + prevItem->pNext = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pFront == pItem); + m_pFront = newItem; + } + ++m_Count; + return newItem; + } + else + return PushBack(); +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem) +{ + if(pItem != VMA_NULL) + { + ItemType* const nextItem = pItem->pNext; + ItemType* const newItem = m_ItemAllocator.Alloc(); + newItem->pNext = nextItem; + newItem->pPrev = pItem; + pItem->pNext = newItem; + if(nextItem != VMA_NULL) + { + nextItem->pPrev = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_pBack == pItem); + m_pBack = newItem; + } + ++m_Count; + return newItem; + } + else + return PushFront(); +} + +template +VmaListItem* VmaRawList::InsertBefore(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertBefore(pItem); + newItem->Value = value; + return newItem; +} + +template +VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) +{ + ItemType* const newItem = InsertAfter(pItem); + newItem->Value = value; + return newItem; +} +#endif // _VMA_RAW_LIST_FUNCTIONS +#endif // _VMA_RAW_LIST + +#ifndef _VMA_LIST +template +class VmaList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaList) +public: + class reverse_iterator; + class const_iterator; + class const_reverse_iterator; + + class iterator + { + friend class const_iterator; + friend class VmaList; + public: + iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + iterator operator++(int) { iterator result = *this; ++*this; return result; } + iterator operator--(int) { iterator result = *this; --*this; return result; } + + iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class reverse_iterator + { + friend class const_reverse_iterator; + friend class VmaList; + public: + reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + reverse_iterator operator++(int) { reverse_iterator result = *this; ++* this; return result; } + reverse_iterator operator--(int) { reverse_iterator result = *this; --* this; return result; } + + reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + reverse_iterator& operator--(); + + private: + VmaRawList* m_pList; + VmaListItem* m_pItem; + + reverse_iterator(VmaRawList* pList, VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_iterator + { + friend class VmaList; + public: + const_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_iterator operator++(int) { const_iterator result = *this; ++* this; return result; } + const_iterator operator--(int) { const_iterator result = *this; --* this; return result; } + + const_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pNext; return *this; } + const_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + class const_reverse_iterator + { + friend class VmaList; + public: + const_reverse_iterator() : m_pList(VMA_NULL), m_pItem(VMA_NULL) {} + const_reverse_iterator(const reverse_iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + const_reverse_iterator(const iterator& src) : m_pList(src.m_pList), m_pItem(src.m_pItem) {} + + reverse_iterator drop_const() { return { const_cast*>(m_pList), const_cast*>(m_pItem) }; } + + const T& operator*() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return m_pItem->Value; } + const T* operator->() const { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); return &m_pItem->Value; } + + bool operator==(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem == rhs.m_pItem; } + bool operator!=(const const_reverse_iterator& rhs) const { VMA_HEAVY_ASSERT(m_pList == rhs.m_pList); return m_pItem != rhs.m_pItem; } + + const_reverse_iterator operator++(int) { const_reverse_iterator result = *this; ++* this; return result; } + const_reverse_iterator operator--(int) { const_reverse_iterator result = *this; --* this; return result; } + + const_reverse_iterator& operator++() { VMA_HEAVY_ASSERT(m_pItem != VMA_NULL); m_pItem = m_pItem->pPrev; return *this; } + const_reverse_iterator& operator--(); + + private: + const VmaRawList* m_pList; + const VmaListItem* m_pItem; + + const_reverse_iterator(const VmaRawList* pList, const VmaListItem* pItem) : m_pList(pList), m_pItem(pItem) {} + }; + + VmaList(const AllocatorT& allocator) : m_RawList(allocator.m_pCallbacks) {} + + bool empty() const { return m_RawList.IsEmpty(); } + size_t size() const { return m_RawList.GetCount(); } + + iterator begin() { return iterator(&m_RawList, m_RawList.Front()); } + iterator end() { return iterator(&m_RawList, VMA_NULL); } + + const_iterator cbegin() const { return const_iterator(&m_RawList, m_RawList.Front()); } + const_iterator cend() const { return const_iterator(&m_RawList, VMA_NULL); } + + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + + reverse_iterator rbegin() { return reverse_iterator(&m_RawList, m_RawList.Back()); } + reverse_iterator rend() { return reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator crbegin() const { return const_reverse_iterator(&m_RawList, m_RawList.Back()); } + const_reverse_iterator crend() const { return const_reverse_iterator(&m_RawList, VMA_NULL); } + + const_reverse_iterator rbegin() const { return crbegin(); } + const_reverse_iterator rend() const { return crend(); } + + void push_back(const T& value) { m_RawList.PushBack(value); } + iterator insert(iterator it, const T& value) { return iterator(&m_RawList, m_RawList.InsertBefore(it.m_pItem, value)); } + + void clear() { m_RawList.Clear(); } + void erase(iterator it) { m_RawList.Remove(it.m_pItem); } + +private: + VmaRawList m_RawList; +}; + +#ifndef _VMA_LIST_FUNCTIONS +template +typename VmaList::iterator& VmaList::iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::reverse_iterator& VmaList::reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Front(); + } + return *this; +} + +template +typename VmaList::const_iterator& VmaList::const_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pPrev; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} + +template +typename VmaList::const_reverse_iterator& VmaList::const_reverse_iterator::operator--() +{ + if (m_pItem != VMA_NULL) + { + m_pItem = m_pItem->pNext; + } + else + { + VMA_HEAVY_ASSERT(!m_pList->IsEmpty()); + m_pItem = m_pList->Back(); + } + return *this; +} +#endif // _VMA_LIST_FUNCTIONS +#endif // _VMA_LIST + +#ifndef _VMA_INTRUSIVE_LINKED_LIST +/* +Expected interface of ItemTypeTraits: +struct MyItemTypeTraits +{ + typedef MyItem ItemType; + static ItemType* GetPrev(const ItemType* item) { return item->myPrevPtr; } + static ItemType* GetNext(const ItemType* item) { return item->myNextPtr; } + static ItemType*& AccessPrev(ItemType* item) { return item->myPrevPtr; } + static ItemType*& AccessNext(ItemType* item) { return item->myNextPtr; } +}; +*/ +template +class VmaIntrusiveLinkedList +{ +public: + typedef typename ItemTypeTraits::ItemType ItemType; + static ItemType* GetPrev(const ItemType* item) { return ItemTypeTraits::GetPrev(item); } + static ItemType* GetNext(const ItemType* item) { return ItemTypeTraits::GetNext(item); } + + // Movable, not copyable. + VmaIntrusiveLinkedList() = default; + VmaIntrusiveLinkedList(VmaIntrusiveLinkedList && src); + VmaIntrusiveLinkedList(const VmaIntrusiveLinkedList&) = delete; + VmaIntrusiveLinkedList& operator=(VmaIntrusiveLinkedList&& src); + VmaIntrusiveLinkedList& operator=(const VmaIntrusiveLinkedList&) = delete; + ~VmaIntrusiveLinkedList() { VMA_HEAVY_ASSERT(IsEmpty()); } + + size_t GetCount() const { return m_Count; } + bool IsEmpty() const { return m_Count == 0; } + ItemType* Front() { return m_Front; } + ItemType* Back() { return m_Back; } + const ItemType* Front() const { return m_Front; } + const ItemType* Back() const { return m_Back; } + + void PushBack(ItemType* item); + void PushFront(ItemType* item); + ItemType* PopBack(); + ItemType* PopFront(); + + // MyItem can be null - it means PushBack. + void InsertBefore(ItemType* existingItem, ItemType* newItem); + // MyItem can be null - it means PushFront. + void InsertAfter(ItemType* existingItem, ItemType* newItem); + void Remove(ItemType* item); + void RemoveAll(); + +private: + ItemType* m_Front = VMA_NULL; + ItemType* m_Back = VMA_NULL; + size_t m_Count = 0; +}; + +#ifndef _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +template +VmaIntrusiveLinkedList::VmaIntrusiveLinkedList(VmaIntrusiveLinkedList&& src) + : m_Front(src.m_Front), m_Back(src.m_Back), m_Count(src.m_Count) +{ + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; +} + +template +VmaIntrusiveLinkedList& VmaIntrusiveLinkedList::operator=(VmaIntrusiveLinkedList&& src) +{ + if (&src != this) + { + VMA_HEAVY_ASSERT(IsEmpty()); + m_Front = src.m_Front; + m_Back = src.m_Back; + m_Count = src.m_Count; + src.m_Front = src.m_Back = VMA_NULL; + src.m_Count = 0; + } + return *this; +} + +template +void VmaIntrusiveLinkedList::PushBack(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessPrev(item) = m_Back; + ItemTypeTraits::AccessNext(m_Back) = item; + m_Back = item; + ++m_Count; + } +} + +template +void VmaIntrusiveLinkedList::PushFront(ItemType* item) +{ + VMA_HEAVY_ASSERT(ItemTypeTraits::GetPrev(item) == VMA_NULL && ItemTypeTraits::GetNext(item) == VMA_NULL); + if (IsEmpty()) + { + m_Front = item; + m_Back = item; + m_Count = 1; + } + else + { + ItemTypeTraits::AccessNext(item) = m_Front; + ItemTypeTraits::AccessPrev(m_Front) = item; + m_Front = item; + ++m_Count; + } +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopBack() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const backItem = m_Back; + ItemType* const prevItem = ItemTypeTraits::GetPrev(backItem); + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = VMA_NULL; + } + m_Back = prevItem; + --m_Count; + ItemTypeTraits::AccessPrev(backItem) = VMA_NULL; + ItemTypeTraits::AccessNext(backItem) = VMA_NULL; + return backItem; +} + +template +typename VmaIntrusiveLinkedList::ItemType* VmaIntrusiveLinkedList::PopFront() +{ + VMA_HEAVY_ASSERT(m_Count > 0); + ItemType* const frontItem = m_Front; + ItemType* const nextItem = ItemTypeTraits::GetNext(frontItem); + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = VMA_NULL; + } + m_Front = nextItem; + --m_Count; + ItemTypeTraits::AccessPrev(frontItem) = VMA_NULL; + ItemTypeTraits::AccessNext(frontItem) = VMA_NULL; + return frontItem; +} + +template +void VmaIntrusiveLinkedList::InsertBefore(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::GetPrev(existingItem); + ItemTypeTraits::AccessPrev(newItem) = prevItem; + ItemTypeTraits::AccessNext(newItem) = existingItem; + ItemTypeTraits::AccessPrev(existingItem) = newItem; + if (prevItem != VMA_NULL) + { + ItemTypeTraits::AccessNext(prevItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Front == existingItem); + m_Front = newItem; + } + ++m_Count; + } + else + PushBack(newItem); +} + +template +void VmaIntrusiveLinkedList::InsertAfter(ItemType* existingItem, ItemType* newItem) +{ + VMA_HEAVY_ASSERT(newItem != VMA_NULL && ItemTypeTraits::GetPrev(newItem) == VMA_NULL && ItemTypeTraits::GetNext(newItem) == VMA_NULL); + if (existingItem != VMA_NULL) + { + ItemType* const nextItem = ItemTypeTraits::GetNext(existingItem); + ItemTypeTraits::AccessNext(newItem) = nextItem; + ItemTypeTraits::AccessPrev(newItem) = existingItem; + ItemTypeTraits::AccessNext(existingItem) = newItem; + if (nextItem != VMA_NULL) + { + ItemTypeTraits::AccessPrev(nextItem) = newItem; + } + else + { + VMA_HEAVY_ASSERT(m_Back == existingItem); + m_Back = newItem; + } + ++m_Count; + } + else + return PushFront(newItem); +} + +template +void VmaIntrusiveLinkedList::Remove(ItemType* item) +{ + VMA_HEAVY_ASSERT(item != VMA_NULL && m_Count > 0); + if (ItemTypeTraits::GetPrev(item) != VMA_NULL) + { + ItemTypeTraits::AccessNext(ItemTypeTraits::AccessPrev(item)) = ItemTypeTraits::GetNext(item); + } + else + { + VMA_HEAVY_ASSERT(m_Front == item); + m_Front = ItemTypeTraits::GetNext(item); + } + + if (ItemTypeTraits::GetNext(item) != VMA_NULL) + { + ItemTypeTraits::AccessPrev(ItemTypeTraits::AccessNext(item)) = ItemTypeTraits::GetPrev(item); + } + else + { + VMA_HEAVY_ASSERT(m_Back == item); + m_Back = ItemTypeTraits::GetPrev(item); + } + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + --m_Count; +} + +template +void VmaIntrusiveLinkedList::RemoveAll() +{ + if (!IsEmpty()) + { + ItemType* item = m_Back; + while (item != VMA_NULL) + { + ItemType* const prevItem = ItemTypeTraits::AccessPrev(item); + ItemTypeTraits::AccessPrev(item) = VMA_NULL; + ItemTypeTraits::AccessNext(item) = VMA_NULL; + item = prevItem; + } + m_Front = VMA_NULL; + m_Back = VMA_NULL; + m_Count = 0; + } +} +#endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS +#endif // _VMA_INTRUSIVE_LINKED_LIST + +#if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED +class VmaStringBuilder +{ +public: + VmaStringBuilder(const VkAllocationCallbacks* allocationCallbacks) : m_Data(VmaStlAllocator(allocationCallbacks)) {} + ~VmaStringBuilder() = default; + + size_t GetLength() const { return m_Data.size(); } + const char* GetData() const { return m_Data.data(); } + void AddNewLine() { Add('\n'); } + void Add(char ch) { m_Data.push_back(ch); } + + void Add(const char* pStr); + void AddNumber(uint32_t num); + void AddNumber(uint64_t num); + void AddPointer(const void* ptr); + +private: + VmaVector> m_Data; +}; + +#ifndef _VMA_STRING_BUILDER_FUNCTIONS +void VmaStringBuilder::Add(const char* pStr) +{ + const size_t strLen = strlen(pStr); + if (strLen > 0) + { + const size_t oldCount = m_Data.size(); + m_Data.resize(oldCount + strLen); + memcpy(m_Data.data() + oldCount, pStr, strLen); + } +} + +void VmaStringBuilder::AddNumber(uint32_t num) +{ + char buf[11]; + buf[10] = '\0'; + char* p = &buf[10]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddNumber(uint64_t num) +{ + char buf[21]; + buf[20] = '\0'; + char* p = &buf[20]; + do + { + *--p = '0' + (char)(num % 10); + num /= 10; + } while (num); + Add(p); +} + +void VmaStringBuilder::AddPointer(const void* ptr) +{ + char buf[21]; + VmaPtrToStr(buf, sizeof(buf), ptr); + Add(buf); +} +#endif //_VMA_STRING_BUILDER_FUNCTIONS +#endif // _VMA_STRING_BUILDER + +#if !defined(_VMA_JSON_WRITER) && VMA_STATS_STRING_ENABLED +/* +Allows to conveniently build a correct JSON document to be written to the +VmaStringBuilder passed to the constructor. +*/ +class VmaJsonWriter +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) +public: + // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. + VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); + ~VmaJsonWriter(); + + // Begins object by writing "{". + // Inside an object, you must call pairs of WriteString and a value, e.g.: + // j.BeginObject(true); j.WriteString("A"); j.WriteNumber(1); j.WriteString("B"); j.WriteNumber(2); j.EndObject(); + // Will write: { "A": 1, "B": 2 } + void BeginObject(bool singleLine = false); + // Ends object by writing "}". + void EndObject(); + + // Begins array by writing "[". + // Inside an array, you can write a sequence of any values. + void BeginArray(bool singleLine = false); + // Ends array by writing "[". + void EndArray(); + + // Writes a string value inside "". + // pStr can contain any ANSI characters, including '"', new line etc. - they will be properly escaped. + void WriteString(const char* pStr); + + // Begins writing a string value. + // Call BeginString, ContinueString, ContinueString, ..., EndString instead of + // WriteString to conveniently build the string content incrementally, made of + // parts including numbers. + void BeginString(const char* pStr = VMA_NULL); + // Posts next part of an open string. + void ContinueString(const char* pStr); + // Posts next part of an open string. The number is converted to decimal characters. + void ContinueString(uint32_t n); + void ContinueString(uint64_t n); + // Posts next part of an open string. Pointer value is converted to characters + // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 + void ContinueString_Pointer(const void* ptr); + // Ends writing a string value by writing '"'. + void EndString(const char* pStr = VMA_NULL); + + // Writes a number value. + void WriteNumber(uint32_t n); + void WriteNumber(uint64_t n); + // Writes a boolean value - false or true. + void WriteBool(bool b); + // Writes a null value. + void WriteNull(); + +private: + enum COLLECTION_TYPE + { + COLLECTION_TYPE_OBJECT, + COLLECTION_TYPE_ARRAY, + }; + struct StackItem + { + COLLECTION_TYPE type; + uint32_t valueCount; + bool singleLineMode; + }; + + static const char* const INDENT; + + VmaStringBuilder& m_SB; + VmaVector< StackItem, VmaStlAllocator > m_Stack; + bool m_InsideString; + + void BeginValue(bool isString); + void WriteIndent(bool oneLess = false); +}; +const char* const VmaJsonWriter::INDENT = " "; + +#ifndef _VMA_JSON_WRITER_FUNCTIONS +VmaJsonWriter::VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb) + : m_SB(sb), + m_Stack(VmaStlAllocator(pAllocationCallbacks)), + m_InsideString(false) {} + +VmaJsonWriter::~VmaJsonWriter() +{ + VMA_ASSERT(!m_InsideString); + VMA_ASSERT(m_Stack.empty()); +} + +void VmaJsonWriter::BeginObject(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('{'); + + StackItem item; + item.type = COLLECTION_TYPE_OBJECT; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndObject() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add('}'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_OBJECT); + m_Stack.pop_back(); +} + +void VmaJsonWriter::BeginArray(bool singleLine) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(false); + m_SB.Add('['); + + StackItem item; + item.type = COLLECTION_TYPE_ARRAY; + item.valueCount = 0; + item.singleLineMode = singleLine; + m_Stack.push_back(item); +} + +void VmaJsonWriter::EndArray() +{ + VMA_ASSERT(!m_InsideString); + + WriteIndent(true); + m_SB.Add(']'); + + VMA_ASSERT(!m_Stack.empty() && m_Stack.back().type == COLLECTION_TYPE_ARRAY); + m_Stack.pop_back(); +} + +void VmaJsonWriter::WriteString(const char* pStr) +{ + BeginString(pStr); + EndString(); +} + +void VmaJsonWriter::BeginString(const char* pStr) +{ + VMA_ASSERT(!m_InsideString); + + BeginValue(true); + m_SB.Add('"'); + m_InsideString = true; + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } +} + +void VmaJsonWriter::ContinueString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + + const size_t strLen = strlen(pStr); + for (size_t i = 0; i < strLen; ++i) + { + char ch = pStr[i]; + if (ch == '\\') + { + m_SB.Add("\\\\"); + } + else if (ch == '"') + { + m_SB.Add("\\\""); + } + else if ((uint8_t)ch >= 32) + { + m_SB.Add(ch); + } + else switch (ch) + { + case '\b': + m_SB.Add("\\b"); + break; + case '\f': + m_SB.Add("\\f"); + break; + case '\n': + m_SB.Add("\\n"); + break; + case '\r': + m_SB.Add("\\r"); + break; + case '\t': + m_SB.Add("\\t"); + break; + default: + VMA_ASSERT(0 && "Character not currently supported."); + } + } +} + +void VmaJsonWriter::ContinueString(uint32_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString(uint64_t n) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::ContinueString_Pointer(const void* ptr) +{ + VMA_ASSERT(m_InsideString); + m_SB.AddPointer(ptr); +} + +void VmaJsonWriter::EndString(const char* pStr) +{ + VMA_ASSERT(m_InsideString); + if (pStr != VMA_NULL && pStr[0] != '\0') + { + ContinueString(pStr); + } + m_SB.Add('"'); + m_InsideString = false; +} + +void VmaJsonWriter::WriteNumber(uint32_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteNumber(uint64_t n) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.AddNumber(n); +} + +void VmaJsonWriter::WriteBool(bool b) +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add(b ? "true" : "false"); +} + +void VmaJsonWriter::WriteNull() +{ + VMA_ASSERT(!m_InsideString); + BeginValue(false); + m_SB.Add("null"); +} + +void VmaJsonWriter::BeginValue(bool isString) +{ + if (!m_Stack.empty()) + { + StackItem& currItem = m_Stack.back(); + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 == 0) + { + VMA_ASSERT(isString); + } + + if (currItem.type == COLLECTION_TYPE_OBJECT && + currItem.valueCount % 2 != 0) + { + m_SB.Add(": "); + } + else if (currItem.valueCount > 0) + { + m_SB.Add(", "); + WriteIndent(); + } + else + { + WriteIndent(); + } + ++currItem.valueCount; + } +} + +void VmaJsonWriter::WriteIndent(bool oneLess) +{ + if (!m_Stack.empty() && !m_Stack.back().singleLineMode) + { + m_SB.AddNewLine(); + + size_t count = m_Stack.size(); + if (count > 0 && oneLess) + { + --count; + } + for (size_t i = 0; i < count; ++i) + { + m_SB.Add(INDENT); + } + } +} +#endif // _VMA_JSON_WRITER_FUNCTIONS + +static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedStatistics& stat) +{ + json.BeginObject(); + + json.WriteString("BlockCount"); + json.WriteNumber(stat.statistics.blockCount); + json.WriteString("BlockBytes"); + json.WriteNumber(stat.statistics.blockBytes); + json.WriteString("AllocationCount"); + json.WriteNumber(stat.statistics.allocationCount); + json.WriteString("AllocationBytes"); + json.WriteNumber(stat.statistics.allocationBytes); + json.WriteString("UnusedRangeCount"); + json.WriteNumber(stat.unusedRangeCount); + + if (stat.statistics.allocationCount > 1) + { + json.WriteString("AllocationSizeMin"); + json.WriteNumber(stat.allocationSizeMin); + json.WriteString("AllocationSizeMax"); + json.WriteNumber(stat.allocationSizeMax); + } + if (stat.unusedRangeCount > 1) + { + json.WriteString("UnusedRangeSizeMin"); + json.WriteNumber(stat.unusedRangeSizeMin); + json.WriteString("UnusedRangeSizeMax"); + json.WriteNumber(stat.unusedRangeSizeMax); + } + json.EndObject(); +} +#endif // _VMA_JSON_WRITER + +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + { + ++m_MinorCounter; + } + else if(m_MajorCounter > 0) + { + --m_MajorCounter; + --m_MinorCounter; + } + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK +/* +Represents a single block of device memory (`VkDeviceMemory`) with all the +data about its regions (aka suballocations, #VmaAllocation), assigned and free. + +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. +*/ +class VmaDeviceMemoryBlock +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) +public: + VmaBlockMetadata* m_pMetadata; + + VmaDeviceMemoryBlock(VmaAllocator hAllocator); + ~VmaDeviceMemoryBlock(); + + // Always call after construction. + void Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity); + // Always call before destruction. + void Destroy(VmaAllocator allocator); + + VmaPool GetParentPool() const { return m_hParentPool; } + VkDeviceMemory GetDeviceMemory() const { return m_hMemory; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + uint32_t GetId() const { return m_Id; } + void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc(VmaAllocator hAllocator); + void PostFree(VmaAllocator hAllocator); + + // Validates all data structures inside this object. If not valid, returns false. + bool Validate() const; + VkResult CheckCorruption(VmaAllocator hAllocator); + + // ppData can be null. + VkResult Map(VmaAllocator hAllocator, uint32_t count, void** ppData); + void Unmap(VmaAllocator hAllocator, uint32_t count); + + VkResult WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + VkResult ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize); + + VkResult BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); + +private: + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + uint32_t m_MemoryTypeIndex; + uint32_t m_Id; + VkDeviceMemory m_hMemory; + + /* + Protects access to m_hMemory so it is not used by multiple threads simultaneously, e.g. vkMapMemory, vkBindBufferMemory. + Also protects m_MapCount, m_pMappedData. + Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. + */ + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; + uint32_t m_MapCount; + void* m_pMappedData; +}; +#endif // _VMA_DEVICE_MEMORY_BLOCK + +#ifndef _VMA_ALLOCATION_T +struct VmaAllocation_T +{ + friend struct VmaDedicatedAllocationListItemTraits; + + enum FLAGS + { + FLAG_PERSISTENT_MAP = 0x01, + FLAG_MAPPING_ALLOWED = 0x02, + }; + +public: + enum ALLOCATION_TYPE + { + ALLOCATION_TYPE_NONE, + ALLOCATION_TYPE_BLOCK, + ALLOCATION_TYPE_DEDICATED, + }; + + // This struct is allocated using VmaPoolAllocator. + VmaAllocation_T(bool mappingAllowed); + ~VmaAllocation_T(); + + void InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped); + // pMappedData not null means allocation is created with MAPPED flag. + void InitDedicatedAllocation( + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size); + + ALLOCATION_TYPE GetType() const { return (ALLOCATION_TYPE)m_Type; } + VkDeviceSize GetAlignment() const { return m_Alignment; } + VkDeviceSize GetSize() const { return m_Size; } + void* GetUserData() const { return m_pUserData; } + const char* GetName() const { return m_pName; } + VmaSuballocationType GetSuballocationType() const { return (VmaSuballocationType)m_SuballocationType; } + + VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } + + void SetUserData(VmaAllocator hAllocator, void* pUserData) { m_pUserData = pUserData; } + void SetName(VmaAllocator hAllocator, const char* pName); + void FreeName(VmaAllocator hAllocator); + uint8_t SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation); + VmaAllocHandle GetAllocHandle() const; + VkDeviceSize GetOffset() const; + VmaPool GetParentPool() const; + VkDeviceMemory GetMemory() const; + void* GetMappedData() const; + + void BlockAllocMap(); + void BlockAllocUnmap(); + VkResult DedicatedAllocMap(VmaAllocator hAllocator, void** ppData); + void DedicatedAllocUnmap(VmaAllocator hAllocator); + +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } + void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); + } + void InitImageUsage(const VkImageCreateInfo &createInfo) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo); + } + void PrintParameters(class VmaJsonWriter& json) const; +#endif + +private: + // Allocation out of VmaDeviceMemoryBlock. + struct BlockAllocation + { + VmaDeviceMemoryBlock* m_Block; + VmaAllocHandle m_AllocHandle; + }; + // Allocation for an object that has its own private VkDeviceMemory. + struct DedicatedAllocation + { + VmaPool m_hParentPool; // VK_NULL_HANDLE if not belongs to custom pool. + VkDeviceMemory m_hMemory; + void* m_pMappedData; // Not null means memory is mapped. + VmaAllocation_T* m_Prev; + VmaAllocation_T* m_Next; + }; + union + { + // Allocation out of VmaDeviceMemoryBlock. + BlockAllocation m_BlockAllocation; + // Allocation for an object that has its own private VkDeviceMemory. + DedicatedAllocation m_DedicatedAllocation; + }; + + VkDeviceSize m_Alignment; + VkDeviceSize m_Size; + void* m_pUserData; + char* m_pName; + uint32_t m_MemoryTypeIndex; + uint8_t m_Type; // ALLOCATION_TYPE + uint8_t m_SuballocationType; // VmaSuballocationType + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). + uint8_t m_MapCount; + uint8_t m_Flags; // enum FLAGS +#if VMA_STATS_STRING_ENABLED + VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. +#endif +}; +#endif // _VMA_ALLOCATION_T + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS +struct VmaDedicatedAllocationListItemTraits +{ + typedef VmaAllocation_T ItemType; + + static ItemType* GetPrev(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType* GetNext(const ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } + static ItemType*& AccessPrev(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Prev; + } + static ItemType*& AccessNext(ItemType* item) + { + VMA_HEAVY_ASSERT(item->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + return item->m_DedicatedAllocation.m_Next; + } +}; +#endif // _VMA_DEDICATED_ALLOCATION_LIST_ITEM_TRAITS + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST +/* +Stores linked list of VmaAllocation_T objects. +Thread-safe, synchronized internally. +*/ +class VmaDedicatedAllocationList +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) +public: + VmaDedicatedAllocationList() {} + ~VmaDedicatedAllocationList(); + + void Init(bool useMutex) { m_UseMutex = useMutex; } + bool Validate(); + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + void AddStatistics(VmaStatistics& inoutStats); +#if VMA_STATS_STRING_ENABLED + // Writes JSON array with the list of allocations. + void BuildStatsString(VmaJsonWriter& json); +#endif + + bool IsEmpty(); + void Register(VmaAllocation alloc); + void Unregister(VmaAllocation alloc); + +private: + typedef VmaIntrusiveLinkedList DedicatedAllocationLinkedList; + + bool m_UseMutex = true; + VMA_RW_MUTEX m_Mutex; + DedicatedAllocationLinkedList m_AllocationList; +}; + +#ifndef _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS + +VmaDedicatedAllocationList::~VmaDedicatedAllocationList() +{ + VMA_HEAVY_ASSERT(Validate()); + + if (!m_AllocationList.IsEmpty()) + { + VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); + } +} + +bool VmaDedicatedAllocationList::Validate() +{ + const size_t declaredCount = m_AllocationList.GetCount(); + size_t actualCount = 0; + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + ++actualCount; + } + VMA_VALIDATE(actualCount == declaredCount); + + return true; +} + +void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + VmaAddDetailedStatisticsAllocation(inoutStats, item->GetSize()); + } +} + +void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + + const uint32_t allocCount = (uint32_t)m_AllocationList.GetCount(); + inoutStats.blockCount += allocCount; + inoutStats.allocationCount += allocCount; + + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) + { + const VkDeviceSize size = item->GetSize(); + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size; + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaDedicatedAllocationList::BuildStatsString(VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + json.BeginArray(); + for (VmaAllocation alloc = m_AllocationList.Front(); + alloc != VMA_NULL; alloc = m_AllocationList.GetNext(alloc)) + { + json.BeginObject(true); + alloc->PrintParameters(json); + json.EndObject(); + } + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaDedicatedAllocationList::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_UseMutex); + return m_AllocationList.IsEmpty(); +} + +void VmaDedicatedAllocationList::Register(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.PushBack(alloc); +} + +void VmaDedicatedAllocationList::Unregister(VmaAllocation alloc) +{ + VmaMutexLockWrite lock(m_Mutex, m_UseMutex); + m_AllocationList.Remove(alloc); +} +#endif // _VMA_DEDICATED_ALLOCATION_LIST_FUNCTIONS +#endif // _VMA_DEDICATED_ALLOCATION_LIST + +#ifndef _VMA_SUBALLOCATION +/* +Represents a region of VmaDeviceMemoryBlock that is either assigned and returned as +allocated memory block or free. +*/ +struct VmaSuballocation +{ + VkDeviceSize offset; + VkDeviceSize size; + void* userData; + VmaSuballocationType type; +}; + +// Comparator for offsets. +struct VmaSuballocationOffsetLess +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset < rhs.offset; + } +}; + +struct VmaSuballocationOffsetGreater +{ + bool operator()(const VmaSuballocation& lhs, const VmaSuballocation& rhs) const + { + return lhs.offset > rhs.offset; + } +}; + +struct VmaSuballocationItemSizeLess +{ + bool operator()(const VmaSuballocationList::iterator lhs, + const VmaSuballocationList::iterator rhs) const + { + return lhs->size < rhs->size; + } + + bool operator()(const VmaSuballocationList::iterator lhs, + VkDeviceSize rhsSize) const + { + return lhs->size < rhsSize; + } +}; +#endif // _VMA_SUBALLOCATION + +#ifndef _VMA_ALLOCATION_REQUEST +/* +Parameters of planned allocation inside a VmaDeviceMemoryBlock. +item points to a FREE suballocation. +*/ +struct VmaAllocationRequest +{ + VmaAllocHandle allocHandle; + VkDeviceSize size; + VmaSuballocationList::iterator item; + void* customData; + uint64_t algorithmData; + VmaAllocationRequestType type; +}; +#endif // _VMA_ALLOCATION_REQUEST + +#ifndef _VMA_BLOCK_METADATA +/* +Data structure used for bookkeeping of allocations and unused ranges of memory +in a single VkDeviceMemory block. +*/ +class VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) +public: + // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. + VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata() = default; + + virtual void Init(VkDeviceSize size) { m_Size = size; } + bool IsVirtual() const { return m_IsVirtual; } + VkDeviceSize GetSize() const { return m_Size; } + + // Validates all data structures inside this object. If not valid, returns false. + virtual bool Validate() const = 0; + virtual size_t GetAllocationCount() const = 0; + virtual size_t GetFreeRegionsCount() const = 0; + virtual VkDeviceSize GetSumFreeSize() const = 0; + // Returns true if this block is empty - contains only single free suballocation. + virtual bool IsEmpty() const = 0; + virtual void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) = 0; + virtual VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const = 0; + virtual void* GetAllocationUserData(VmaAllocHandle allocHandle) const = 0; + + virtual VmaAllocHandle GetAllocationListBegin() const = 0; + virtual VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const = 0; + virtual VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const = 0; + + // Shouldn't modify blockCount. + virtual void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const = 0; + virtual void AddStatistics(VmaStatistics& inoutStats) const = 0; + +#if VMA_STATS_STRING_ENABLED + virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; +#endif + + // Tries to find a place for suballocation with given parameters inside this block. + // If succeeded, fills pAllocationRequest and returns true. + // If failed, returns false. + virtual bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + // Always one of VMA_ALLOCATION_CREATE_STRATEGY_* or VMA_ALLOCATION_INTERNAL_STRATEGY_* flags. + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) = 0; + + virtual VkResult CheckCorruption(const void* pBlockData) = 0; + + // Makes actual allocation based on request. Request must already be checked and valid. + virtual void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) = 0; + + // Frees suballocation assigned to given memory region. + virtual void Free(VmaAllocHandle allocHandle) = 0; + + // Frees all allocations. + // Careful! Don't call it if there are VmaAllocation objects owned by userData of cleared allocations! + virtual void Clear() = 0; + + virtual void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) = 0; + virtual void DebugLogAllAllocations() const = 0; + +protected: + const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } + + void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; +#if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. + void PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, + size_t allocationCount, + size_t unusedRangeCount) const; + void PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const; + void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, + VkDeviceSize size) const; + void PrintDetailedMap_End(class VmaJsonWriter& json) const; +#endif + +private: + VkDeviceSize m_Size; + const VkAllocationCallbacks* m_pAllocationCallbacks; + const VkDeviceSize m_BufferImageGranularity; + const bool m_IsVirtual; +}; + +#ifndef _VMA_BLOCK_METADATA_FUNCTIONS +VmaBlockMetadata::VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : m_Size(0), + m_pAllocationCallbacks(pAllocationCallbacks), + m_BufferImageGranularity(bufferImageGranularity), + m_IsVirtual(isVirtual) {} + +void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + if (IsVirtual()) + { + VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); + } + else + { + VMA_ASSERT(userData != VMA_NULL); + VmaAllocation allocation = reinterpret_cast(userData); + + userData = allocation->GetUserData(); + const char* name = allocation->GetName(); + +#if VMA_STATS_STRING_ENABLED + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, + offset, size, userData, name ? name : "vma_empty", + VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], + (uint64_t)allocation->GetBufferImageUsage().Value); +#else + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", + offset, size, userData, name ? name : "vma_empty", + (unsigned)allocation->GetSuballocationType()); +#endif // VMA_STATS_STRING_ENABLED + } + +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const +{ + json.WriteString("TotalBytes"); + json.WriteNumber(GetSize()); + + json.WriteString("UnusedBytes"); + json.WriteNumber(unusedBytes); + + json.WriteString("Allocations"); + json.WriteNumber((uint64_t)allocationCount); + + json.WriteString("UnusedRanges"); + json.WriteNumber((uint64_t)unusedRangeCount); + + json.WriteString("Suballocations"); + json.BeginArray(); +} + +void VmaBlockMetadata::PrintDetailedMap_Allocation(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size, void* userData) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + if (IsVirtual()) + { + json.WriteString("Size"); + json.WriteNumber(size); + if (userData) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(userData); + json.EndString(); + } + } + else + { + ((VmaAllocation)userData)->PrintParameters(json); + } + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, + VkDeviceSize offset, VkDeviceSize size) const +{ + json.BeginObject(true); + + json.WriteString("Offset"); + json.WriteNumber(offset); + + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[VMA_SUBALLOCATION_TYPE_FREE]); + + json.WriteString("Size"); + json.WriteNumber(size); + + json.EndObject(); +} + +void VmaBlockMetadata::PrintDetailedMap_End(class VmaJsonWriter& json) const +{ + json.EndArray(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_BLOCK_METADATA_FUNCTIONS +#endif // _VMA_BLOCK_METADATA + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY +// Before deleting object of this class remember to call 'Destroy()' +class VmaBlockBufferImageGranularity final +{ +public: + struct ValidationContext + { + const VkAllocationCallbacks* allocCallbacks; + uint16_t* pageAllocs; + }; + + VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity); + ~VmaBlockBufferImageGranularity(); + + bool IsEnabled() const { return m_BufferImageGranularity > MAX_LOW_BUFFER_IMAGE_GRANULARITY; } + + void Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size); + // Before destroying object you must call free it's memory + void Destroy(const VkAllocationCallbacks* pAllocationCallbacks); + + void RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const; + + bool CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const; + + void AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size); + void FreePages(VkDeviceSize offset, VkDeviceSize size); + void Clear(); + + ValidationContext StartValidation(const VkAllocationCallbacks* pAllocationCallbacks, + bool isVirutal) const; + bool Validate(ValidationContext& ctx, VkDeviceSize offset, VkDeviceSize size) const; + bool FinishValidation(ValidationContext& ctx) const; + +private: + static const uint16_t MAX_LOW_BUFFER_IMAGE_GRANULARITY = 256; + + struct RegionInfo + { + uint8_t allocType; + uint16_t allocCount; + }; + + VkDeviceSize m_BufferImageGranularity; + uint32_t m_RegionCount; + RegionInfo* m_RegionInfo; + + uint32_t GetStartPage(VkDeviceSize offset) const { return OffsetToPageIndex(offset & ~(m_BufferImageGranularity - 1)); } + uint32_t GetEndPage(VkDeviceSize offset, VkDeviceSize size) const { return OffsetToPageIndex((offset + size - 1) & ~(m_BufferImageGranularity - 1)); } + + uint32_t OffsetToPageIndex(VkDeviceSize offset) const; + void AllocPage(RegionInfo& page, uint8_t allocType); +}; + +#ifndef _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +VmaBlockBufferImageGranularity::VmaBlockBufferImageGranularity(VkDeviceSize bufferImageGranularity) + : m_BufferImageGranularity(bufferImageGranularity), + m_RegionCount(0), + m_RegionInfo(VMA_NULL) {} + +VmaBlockBufferImageGranularity::~VmaBlockBufferImageGranularity() +{ + VMA_ASSERT(m_RegionInfo == VMA_NULL && "Free not called before destroying object!"); +} + +void VmaBlockBufferImageGranularity::Init(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize size) +{ + if (IsEnabled()) + { + m_RegionCount = static_cast(VmaDivideRoundingUp(size, m_BufferImageGranularity)); + m_RegionInfo = vma_new_array(pAllocationCallbacks, RegionInfo, m_RegionCount); + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); + } +} + +void VmaBlockBufferImageGranularity::Destroy(const VkAllocationCallbacks* pAllocationCallbacks) +{ + if (m_RegionInfo) + { + vma_delete_array(pAllocationCallbacks, m_RegionInfo, m_RegionCount); + m_RegionInfo = VMA_NULL; + } +} + +void VmaBlockBufferImageGranularity::RoundupAllocRequest(VmaSuballocationType allocType, + VkDeviceSize& inOutAllocSize, + VkDeviceSize& inOutAllocAlignment) const +{ + if (m_BufferImageGranularity > 1 && + m_BufferImageGranularity <= MAX_LOW_BUFFER_IMAGE_GRANULARITY) + { + if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || + allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) + { + inOutAllocAlignment = VMA_MAX(inOutAllocAlignment, m_BufferImageGranularity); + inOutAllocSize = VmaAlignUp(inOutAllocSize, m_BufferImageGranularity); + } + } +} + +bool VmaBlockBufferImageGranularity::CheckConflictAndAlignUp(VkDeviceSize& inOutAllocOffset, + VkDeviceSize allocSize, + VkDeviceSize blockOffset, + VkDeviceSize blockSize, + VmaSuballocationType allocType) const +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(inOutAllocOffset); + if (m_RegionInfo[startPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[startPage].allocType), allocType)) + { + inOutAllocOffset = VmaAlignUp(inOutAllocOffset, m_BufferImageGranularity); + if (blockSize < allocSize + inOutAllocOffset - blockOffset) + return true; + ++startPage; + } + uint32_t endPage = GetEndPage(inOutAllocOffset, allocSize); + if (endPage != startPage && + m_RegionInfo[endPage].allocCount > 0 && + VmaIsBufferImageGranularityConflict(static_cast(m_RegionInfo[endPage].allocType), allocType)) + { + return true; + } + } + return false; +} + +void VmaBlockBufferImageGranularity::AllocPages(uint8_t allocType, VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + AllocPage(m_RegionInfo[startPage], allocType); + + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + AllocPage(m_RegionInfo[endPage], allocType); + } +} + +void VmaBlockBufferImageGranularity::FreePages(VkDeviceSize offset, VkDeviceSize size) +{ + if (IsEnabled()) + { + uint32_t startPage = GetStartPage(offset); + --m_RegionInfo[startPage].allocCount; + if (m_RegionInfo[startPage].allocCount == 0) + m_RegionInfo[startPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + uint32_t endPage = GetEndPage(offset, size); + if (startPage != endPage) + { + --m_RegionInfo[endPage].allocCount; + if (m_RegionInfo[endPage].allocCount == 0) + m_RegionInfo[endPage].allocType = VMA_SUBALLOCATION_TYPE_FREE; + } + } +} + +void VmaBlockBufferImageGranularity::Clear() +{ + if (m_RegionInfo) + memset(m_RegionInfo, 0, m_RegionCount * sizeof(RegionInfo)); +} + +VmaBlockBufferImageGranularity::ValidationContext VmaBlockBufferImageGranularity::StartValidation( + const VkAllocationCallbacks* pAllocationCallbacks, bool isVirutal) const +{ + ValidationContext ctx{ pAllocationCallbacks, VMA_NULL }; + if (!isVirutal && IsEnabled()) + { + ctx.pageAllocs = vma_new_array(pAllocationCallbacks, uint16_t, m_RegionCount); + memset(ctx.pageAllocs, 0, m_RegionCount * sizeof(uint16_t)); + } + return ctx; +} + +bool VmaBlockBufferImageGranularity::Validate(ValidationContext& ctx, + VkDeviceSize offset, VkDeviceSize size) const +{ + if (IsEnabled()) + { + uint32_t start = GetStartPage(offset); + ++ctx.pageAllocs[start]; + VMA_VALIDATE(m_RegionInfo[start].allocCount > 0); + + uint32_t end = GetEndPage(offset, size); + if (start != end) + { + ++ctx.pageAllocs[end]; + VMA_VALIDATE(m_RegionInfo[end].allocCount > 0); + } + } + return true; +} + +bool VmaBlockBufferImageGranularity::FinishValidation(ValidationContext& ctx) const +{ + // Check proper page structure + if (IsEnabled()) + { + VMA_ASSERT(ctx.pageAllocs != VMA_NULL && "Validation context not initialized!"); + + for (uint32_t page = 0; page < m_RegionCount; ++page) + { + VMA_VALIDATE(ctx.pageAllocs[page] == m_RegionInfo[page].allocCount); + } + vma_delete_array(ctx.allocCallbacks, ctx.pageAllocs, m_RegionCount); + ctx.pageAllocs = VMA_NULL; + } + return true; +} + +uint32_t VmaBlockBufferImageGranularity::OffsetToPageIndex(VkDeviceSize offset) const +{ + return static_cast(offset >> VMA_BITSCAN_MSB(m_BufferImageGranularity)); +} + +void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocType) +{ + // When current alloc type is free then it can be overridden by new type + if (page.allocCount == 0 || (page.allocCount > 0 && page.allocType == VMA_SUBALLOCATION_TYPE_FREE)) + page.allocType = allocType; + + ++page.allocCount; +} +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS +#endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY + +#ifndef _VMA_BLOCK_METADATA_LINEAR +/* +Allocations and their references in internal data structure look like this: + +if(m_2ndVectorMode == SECOND_VECTOR_EMPTY): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER): + + 0 +-------+ + | Alloc | 2nd[0] + +-------+ + | Alloc | 2nd[1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | +GetSize() +-------+ + +if(m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK): + + 0 +-------+ + | | + | | + | | + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount] + +-------+ + | Alloc | 1st[m_1stNullItemsBeginCount + 1] + +-------+ + | ... | + +-------+ + | Alloc | 1st[1st.size() - 1] + +-------+ + | | + | | + | | + +-------+ + | Alloc | 2nd[2nd.size() - 1] + +-------+ + | ... | + +-------+ + | Alloc | 2nd[1] + +-------+ + | Alloc | 2nd[0] +GetSize() +-------+ + +*/ +class VmaBlockMetadata_Linear : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) +public: + VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_Linear() = default; + + VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } + bool IsEmpty() const override { return GetAllocationCount() == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + size_t GetAllocationCount() const override; + size_t GetFreeRegionsCount() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + /* + There are two suballocation vectors, used in ping-pong way. + The one with index m_1stVectorIndex is called 1st. + The one with index (m_1stVectorIndex ^ 1) is called 2nd. + 2nd can be non-empty only when 1st is not empty. + When 2nd is not empty, m_2ndVectorMode indicates its mode of operation. + */ + typedef VmaVector> SuballocationVectorType; + + enum SECOND_VECTOR_MODE + { + SECOND_VECTOR_EMPTY, + /* + Suballocations in 2nd vector are created later than the ones in 1st, but they + all have smaller offset. + */ + SECOND_VECTOR_RING_BUFFER, + /* + Suballocations in 2nd vector are upper side of double stack. + They all have offsets higher than those in 1st vector. + Top of this stack means smaller offsets, but higher indices in this vector. + */ + SECOND_VECTOR_DOUBLE_STACK, + }; + + VkDeviceSize m_SumFreeSize; + SuballocationVectorType m_Suballocations0, m_Suballocations1; + uint32_t m_1stVectorIndex; + SECOND_VECTOR_MODE m_2ndVectorMode; + // Number of items in 1st vector with hAllocation = null at the beginning. + size_t m_1stNullItemsBeginCount; + // Number of other items in 1st vector with hAllocation = null somewhere in the middle. + size_t m_1stNullItemsMiddleCount; + // Number of items in 2nd vector with hAllocation = null. + size_t m_2ndNullItemsCount; + + SuballocationVectorType& AccessSuballocations1st() { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + SuballocationVectorType& AccessSuballocations2nd() { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + const SuballocationVectorType& AccessSuballocations1st() const { return m_1stVectorIndex ? m_Suballocations1 : m_Suballocations0; } + const SuballocationVectorType& AccessSuballocations2nd() const { return m_1stVectorIndex ? m_Suballocations0 : m_Suballocations1; } + + VmaSuballocation& FindSuballocation(VkDeviceSize offset) const; + bool ShouldCompact1st() const; + void CleanupAfterFree(); + + bool CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); + bool CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +VmaBlockMetadata_Linear::VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_SumFreeSize(0), + m_Suballocations0(VmaStlAllocator(pAllocationCallbacks)), + m_Suballocations1(VmaStlAllocator(pAllocationCallbacks)), + m_1stVectorIndex(0), + m_2ndVectorMode(SECOND_VECTOR_EMPTY), + m_1stNullItemsBeginCount(0), + m_1stNullItemsMiddleCount(0), + m_2ndNullItemsCount(0) {} + +void VmaBlockMetadata_Linear::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + m_SumFreeSize = size; +} + +bool VmaBlockMetadata_Linear::Validate() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VMA_VALIDATE(suballocations2nd.empty() == (m_2ndVectorMode == SECOND_VECTOR_EMPTY)); + VMA_VALIDATE(!suballocations1st.empty() || + suballocations2nd.empty() || + m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER); + + if (!suballocations1st.empty()) + { + // Null item at the beginning should be accounted into m_1stNullItemsBeginCount. + VMA_VALIDATE(suballocations1st[m_1stNullItemsBeginCount].type != VMA_SUBALLOCATION_TYPE_FREE); + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations1st.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + if (!suballocations2nd.empty()) + { + // Null item at the end should be just pop_back(). + VMA_VALIDATE(suballocations2nd.back().type != VMA_SUBALLOCATION_TYPE_FREE); + } + + VMA_VALIDATE(m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount <= suballocations1st.size()); + VMA_VALIDATE(m_2ndNullItemsCount <= suballocations2nd.size()); + + VkDeviceSize sumUsedSize = 0; + const size_t suballoc1stCount = suballocations1st.size(); + const VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize offset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = 0; i < suballoc2ndCount; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + for (size_t i = 0; i < m_1stNullItemsBeginCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + VMA_VALIDATE(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE && + suballoc.userData == VMA_NULL); + } + + size_t nullItem1stCount = m_1stNullItemsBeginCount; + + for (size_t i = m_1stNullItemsBeginCount; i < suballoc1stCount; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + VMA_VALIDATE(i >= m_1stNullItemsBeginCount || currFree); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem1stCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + VMA_VALIDATE(nullItem1stCount == m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount); + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + const size_t suballoc2ndCount = suballocations2nd.size(); + size_t nullItem2ndCount = 0; + for (size_t i = suballoc2ndCount; i--; ) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + const bool currFree = (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); + + VmaAllocation const alloc = (VmaAllocation)suballoc.userData; + if (!IsVirtual()) + { + VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); + } + VMA_VALIDATE(suballoc.offset >= offset); + + if (!currFree) + { + if (!IsVirtual()) + { + VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == suballoc.offset + 1); + VMA_VALIDATE(alloc->GetSize() == suballoc.size); + } + sumUsedSize += suballoc.size; + } + else + { + ++nullItem2ndCount; + } + + offset = suballoc.offset + suballoc.size + debugMargin; + } + + VMA_VALIDATE(nullItem2ndCount == m_2ndNullItemsCount); + } + + VMA_VALIDATE(offset <= GetSize()); + VMA_VALIDATE(m_SumFreeSize == GetSize() - sumUsedSize); + + return true; +} + +size_t VmaBlockMetadata_Linear::GetAllocationCount() const +{ + return AccessSuballocations1st().size() - m_1stNullItemsBeginCount - m_1stNullItemsMiddleCount + + AccessSuballocations2nd().size() - m_2ndNullItemsCount; +} + +size_t VmaBlockMetadata_Linear::GetFreeRegionsCount() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return SIZE_MAX; +} + +void VmaBlockMetadata_Linear::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += size; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + if (lastOffset < freeSpace2ndTo1stEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + if (lastOffset < freeSpace1stTo2ndEnd) + { + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // There is free space from lastOffset to size. + if (lastOffset < size) + { + const VkDeviceSize unusedRangeSize = size - lastOffset; + VmaAddDetailedStatisticsUnusedRange(inoutStats, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } +} + +void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const VkDeviceSize size = GetSize(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + inoutStats.blockCount++; + inoutStats.blockBytes += size; + inoutStats.allocationBytes += size - m_SumFreeSize; + + VkDeviceSize lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++inoutStats.allocationCount; + + // Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + // End of loop. + lastOffset = size; + } + } + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +{ + const VkDeviceSize size = GetSize(); + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + const size_t suballoc1stCount = suballocations1st.size(); + const size_t suballoc2ndCount = suballocations2nd.size(); + + // FIRST PASS + + size_t unusedRangeCount = 0; + VkDeviceSize usedBytes = 0; + + VkDeviceSize lastOffset = 0; + + size_t alloc2ndCount = 0; + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + size_t nextAlloc1stIndex = m_1stNullItemsBeginCount; + size_t alloc1stCount = 0; + const VkDeviceSize freeSpace1stTo2ndEnd = + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? suballocations2nd.back().offset : size; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc1stCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + ++unusedRangeCount; + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + ++alloc2ndCount; + usedBytes += suballoc.size; + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + ++unusedRangeCount; + } + + // End of loop. + lastOffset = size; + } + } + } + + const VkDeviceSize unusedBytes = size - usedBytes; + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + + // SECOND PASS + lastOffset = 0; + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + const VkDeviceSize freeSpace2ndTo1stEnd = suballocations1st[m_1stNullItemsBeginCount].offset; + size_t nextAlloc2ndIndex = 0; + while (lastOffset < freeSpace2ndTo1stEnd) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex < suballoc2ndCount && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + ++nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex < suballoc2ndCount) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace2ndTo1stEnd) + { + // There is free space from lastOffset to freeSpace2ndTo1stEnd. + const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace2ndTo1stEnd; + } + } + } + + nextAlloc1stIndex = m_1stNullItemsBeginCount; + while (lastOffset < freeSpace1stTo2ndEnd) + { + // Find next non-null allocation or move nextAllocIndex to the end. + while (nextAlloc1stIndex < suballoc1stCount && + suballocations1st[nextAlloc1stIndex].userData == VMA_NULL) + { + ++nextAlloc1stIndex; + } + + // Found non-null allocation. + if (nextAlloc1stIndex < suballoc1stCount) + { + const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + ++nextAlloc1stIndex; + } + // We are at the end. + else + { + if (lastOffset < freeSpace1stTo2ndEnd) + { + // There is free space from lastOffset to freeSpace1stTo2ndEnd. + const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = freeSpace1stTo2ndEnd; + } + } + + if (m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + size_t nextAlloc2ndIndex = suballocations2nd.size() - 1; + while (lastOffset < size) + { + // Find next non-null allocation or move nextAlloc2ndIndex to the end. + while (nextAlloc2ndIndex != SIZE_MAX && + suballocations2nd[nextAlloc2ndIndex].userData == VMA_NULL) + { + --nextAlloc2ndIndex; + } + + // Found non-null allocation. + if (nextAlloc2ndIndex != SIZE_MAX) + { + const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; + + // 1. Process free space before this allocation. + if (lastOffset < suballoc.offset) + { + // There is free space from lastOffset to suballoc.offset. + const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // 2. Process this allocation. + // There is allocation with suballoc.offset, suballoc.size. + PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); + + // 3. Prepare for next iteration. + lastOffset = suballoc.offset + suballoc.size; + --nextAlloc2ndIndex; + } + // We are at the end. + else + { + if (lastOffset < size) + { + // There is free space from lastOffset to size. + const VkDeviceSize unusedRangeSize = size - lastOffset; + PrintDetailedMap_UnusedRange(json, lastOffset, unusedRangeSize); + } + + // End of loop. + lastOffset = size; + } + } + } + + PrintDetailedMap_End(json); +} +#endif // VMA_STATS_STRING_ENABLED + +bool VmaBlockMetadata_Linear::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0); + VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); + VMA_ASSERT(pAllocationRequest != VMA_NULL); + VMA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + + pAllocationRequest->size = allocSize; + return upperAddress ? + CreateAllocationRequest_UpperAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest) : + CreateAllocationRequest_LowerAddress( + allocSize, allocAlignment, allocType, strategy, pAllocationRequest); +} + +VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) +{ + VMA_ASSERT(!IsVirtual()); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (size_t i = m_1stNullItemsBeginCount, count = suballocations1st.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations1st[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (size_t i = 0, count = suballocations2nd.size(); i < count; ++i) + { + const VmaSuballocation& suballoc = suballocations2nd[i]; + if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) + { + if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_Linear::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; + const VmaSuballocation newSuballoc = { offset, request.size, userData, type }; + + switch (request.type) + { + case VmaAllocationRequestType::UpperAddress: + { + VMA_ASSERT(m_2ndVectorMode != SECOND_VECTOR_RING_BUFFER && + "CRITICAL ERROR: Trying to use linear allocator as double stack while it was already used as ring buffer."); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + suballocations2nd.push_back(newSuballoc); + m_2ndVectorMode = SECOND_VECTOR_DOUBLE_STACK; + } + break; + case VmaAllocationRequestType::EndOf1st: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + + VMA_ASSERT(suballocations1st.empty() || + offset >= suballocations1st.back().offset + suballocations1st.back().size); + // Check if it fits before the end of the block. + VMA_ASSERT(offset + request.size <= GetSize()); + + suballocations1st.push_back(newSuballoc); + } + break; + case VmaAllocationRequestType::EndOf2nd: + { + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + // New allocation at the end of 2-part ring buffer, so before first allocation from 1st vector. + VMA_ASSERT(!suballocations1st.empty() && + offset + request.size <= suballocations1st[m_1stNullItemsBeginCount].offset); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + switch (m_2ndVectorMode) + { + case SECOND_VECTOR_EMPTY: + // First allocation from second part ring buffer. + VMA_ASSERT(suballocations2nd.empty()); + m_2ndVectorMode = SECOND_VECTOR_RING_BUFFER; + break; + case SECOND_VECTOR_RING_BUFFER: + // 2-part ring buffer is already started. + VMA_ASSERT(!suballocations2nd.empty()); + break; + case SECOND_VECTOR_DOUBLE_STACK: + VMA_ASSERT(0 && "CRITICAL ERROR: Trying to use linear allocator as ring buffer while it was already used as double stack."); + break; + default: + VMA_ASSERT(0); + } + + suballocations2nd.push_back(newSuballoc); + } + break; + default: + VMA_ASSERT(0 && "CRITICAL INTERNAL ERROR."); + } + + m_SumFreeSize -= newSuballoc.size; +} + +void VmaBlockMetadata_Linear::Free(VmaAllocHandle allocHandle) +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + VkDeviceSize offset = (VkDeviceSize)allocHandle - 1; + + if (!suballocations1st.empty()) + { + // First allocation: Mark it as next empty at the beginning. + VmaSuballocation& firstSuballoc = suballocations1st[m_1stNullItemsBeginCount]; + if (firstSuballoc.offset == offset) + { + firstSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; + firstSuballoc.userData = VMA_NULL; + m_SumFreeSize += firstSuballoc.size; + ++m_1stNullItemsBeginCount; + CleanupAfterFree(); + return; + } + } + + // Last allocation in 2-part ring buffer or top of upper stack (same logic). + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER || + m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + VmaSuballocation& lastSuballoc = suballocations2nd.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations2nd.pop_back(); + CleanupAfterFree(); + return; + } + } + // Last allocation in 1st vector. + else if (m_2ndVectorMode == SECOND_VECTOR_EMPTY) + { + VmaSuballocation& lastSuballoc = suballocations1st.back(); + if (lastSuballoc.offset == offset) + { + m_SumFreeSize += lastSuballoc.size; + suballocations1st.pop_back(); + CleanupAfterFree(); + return; + } + } + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the middle of 1st vector. + { + const SuballocationVectorType::iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_1stNullItemsMiddleCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Item from the middle of 2nd vector. + const SuballocationVectorType::iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + it->type = VMA_SUBALLOCATION_TYPE_FREE; + it->userData = VMA_NULL; + ++m_2ndNullItemsCount; + m_SumFreeSize += it->size; + CleanupAfterFree(); + return; + } + } + + VMA_ASSERT(0 && "Allocation to free not found in linear allocator!"); +} + +void VmaBlockMetadata_Linear::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + outInfo.offset = (VkDeviceSize)allocHandle - 1; + VmaSuballocation& suballoc = FindSuballocation(outInfo.offset); + outInfo.size = suballoc.size; + outInfo.pUserData = suballoc.userData; +} + +void* VmaBlockMetadata_Linear::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + return FindSuballocation((VkDeviceSize)allocHandle - 1).userData; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetAllocationListBegin() const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_Linear::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_Linear::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + // Function only used for defragmentation, which is disabled for this algorithm + VMA_ASSERT(0); + return 0; +} + +void VmaBlockMetadata_Linear::Clear() +{ + m_SumFreeSize = GetSize(); + m_Suballocations0.clear(); + m_Suballocations1.clear(); + // Leaving m_1stVectorIndex unchanged - it doesn't matter. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; +} + +void VmaBlockMetadata_Linear::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + VmaSuballocation& suballoc = FindSuballocation((VkDeviceSize)allocHandle - 1); + suballoc.userData = userData; +} + +void VmaBlockMetadata_Linear::DebugLogAllAllocations() const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + for (auto it = suballocations1st.begin() + m_1stNullItemsBeginCount; it != suballocations1st.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); + + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + for (auto it = suballocations2nd.begin(); it != suballocations2nd.end(); ++it) + if (it->type != VMA_SUBALLOCATION_TYPE_FREE) + DebugLogAllocation(it->offset, it->size, it->userData); +} + +VmaSuballocation& VmaBlockMetadata_Linear::FindSuballocation(VkDeviceSize offset) const +{ + const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + const SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + VmaSuballocation refSuballoc; + refSuballoc.offset = offset; + // Rest of members stays uninitialized intentionally for better performance. + + // Item from the 1st vector. + { + SuballocationVectorType::const_iterator it = VmaBinaryFindSorted( + suballocations1st.begin() + m_1stNullItemsBeginCount, + suballocations1st.end(), + refSuballoc, + VmaSuballocationOffsetLess()); + if (it != suballocations1st.end()) + { + return const_cast(*it); + } + } + + if (m_2ndVectorMode != SECOND_VECTOR_EMPTY) + { + // Rest of members stays uninitialized intentionally for better performance. + SuballocationVectorType::const_iterator it = m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER ? + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetLess()) : + VmaBinaryFindSorted(suballocations2nd.begin(), suballocations2nd.end(), refSuballoc, VmaSuballocationOffsetGreater()); + if (it != suballocations2nd.end()) + { + return const_cast(*it); + } + } + + VMA_ASSERT(0 && "Allocation not found in linear allocator!"); + return const_cast(suballocations1st.back()); // Should never occur. +} + +bool VmaBlockMetadata_Linear::ShouldCompact1st() const +{ + const size_t nullItemCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + const size_t suballocCount = AccessSuballocations1st().size(); + return suballocCount > 32 && nullItemCount * 2 >= (suballocCount - nullItemCount) * 3; +} + +void VmaBlockMetadata_Linear::CleanupAfterFree() +{ + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (IsEmpty()) + { + suballocations1st.clear(); + suballocations2nd.clear(); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + m_2ndNullItemsCount = 0; + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + else + { + const size_t suballoc1stCount = suballocations1st.size(); + const size_t nullItem1stCount = m_1stNullItemsBeginCount + m_1stNullItemsMiddleCount; + VMA_ASSERT(nullItem1stCount <= suballoc1stCount); + + // Find more null items at the beginning of 1st vector. + while (m_1stNullItemsBeginCount < suballoc1stCount && + suballocations1st[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + + // Find more null items at the end of 1st vector. + while (m_1stNullItemsMiddleCount > 0 && + suballocations1st.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_1stNullItemsMiddleCount; + suballocations1st.pop_back(); + } + + // Find more null items at the end of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd.back().type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + suballocations2nd.pop_back(); + } + + // Find more null items at the beginning of 2nd vector. + while (m_2ndNullItemsCount > 0 && + suballocations2nd[0].type == VMA_SUBALLOCATION_TYPE_FREE) + { + --m_2ndNullItemsCount; + VmaVectorRemove(suballocations2nd, 0); + } + + if (ShouldCompact1st()) + { + const size_t nonNullItemCount = suballoc1stCount - nullItem1stCount; + size_t srcIndex = m_1stNullItemsBeginCount; + for (size_t dstIndex = 0; dstIndex < nonNullItemCount; ++dstIndex) + { + while (suballocations1st[srcIndex].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++srcIndex; + } + if (dstIndex != srcIndex) + { + suballocations1st[dstIndex] = suballocations1st[srcIndex]; + } + ++srcIndex; + } + suballocations1st.resize(nonNullItemCount); + m_1stNullItemsBeginCount = 0; + m_1stNullItemsMiddleCount = 0; + } + + // 2nd vector became empty. + if (suballocations2nd.empty()) + { + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + } + + // 1st vector became empty. + if (suballocations1st.size() - m_1stNullItemsBeginCount == 0) + { + suballocations1st.clear(); + m_1stNullItemsBeginCount = 0; + + if (!suballocations2nd.empty() && m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + // Swap 1st with 2nd. Now 2nd is empty. + m_2ndVectorMode = SECOND_VECTOR_EMPTY; + m_1stNullItemsMiddleCount = m_2ndNullItemsCount; + while (m_1stNullItemsBeginCount < suballocations2nd.size() && + suballocations2nd[m_1stNullItemsBeginCount].type == VMA_SUBALLOCATION_TYPE_FREE) + { + ++m_1stNullItemsBeginCount; + --m_1stNullItemsMiddleCount; + } + m_2ndNullItemsCount = 0; + m_1stVectorIndex ^= 1; + } + } + } + + VMA_HEAVY_ASSERT(Validate()); +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_LowerAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize debugMargin = GetDebugMargin(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + // Try to allocate at the end of 1st vector. + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations1st.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations1st.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations1st.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + const VkDeviceSize freeSpaceEnd = m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK ? + suballocations2nd.back().offset : blockSize; + + // There is enough free space at the end after alignment. + if (resultOffset + allocSize + debugMargin <= freeSpaceEnd) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if ((allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) && m_2ndVectorMode == SECOND_VECTOR_DOUBLE_STACK) + { + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on previous page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item, customData unused. + pAllocationRequest->type = VmaAllocationRequestType::EndOf1st; + return true; + } + } + + // Wrap-around to end of 2nd vector. Try to allocate there, watching for the + // beginning of 1st vector as the end of free space. + if (m_2ndVectorMode == SECOND_VECTOR_EMPTY || m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(!suballocations1st.empty()); + + VkDeviceSize resultBaseOffset = 0; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset + lastSuballoc.size + debugMargin; + } + + // Start from offset equal to beginning of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + // Apply alignment. + resultOffset = VmaAlignUp(resultOffset, allocAlignment); + + // Check previous suballocations for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t prevSuballocIndex = suballocations2nd.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations2nd[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignUp(resultOffset, bufferImageGranularity); + } + } + + size_t index1st = m_1stNullItemsBeginCount; + + // There is enough free space at the end after alignment. + if ((index1st == suballocations1st.size() && resultOffset + allocSize + debugMargin <= blockSize) || + (index1st < suballocations1st.size() && resultOffset + allocSize + debugMargin <= suballocations1st[index1st].offset)) + { + // Check next suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (allocSize % bufferImageGranularity || resultOffset % bufferImageGranularity) + { + for (size_t nextSuballocIndex = index1st; + nextSuballocIndex < suballocations1st.size(); + nextSuballocIndex++) + { + const VmaSuballocation& nextSuballoc = suballocations1st[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + pAllocationRequest->type = VmaAllocationRequestType::EndOf2nd; + // pAllocationRequest->item, customData unused. + return true; + } + } + + return false; +} + +bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + const VkDeviceSize blockSize = GetSize(); + const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); + SuballocationVectorType& suballocations1st = AccessSuballocations1st(); + SuballocationVectorType& suballocations2nd = AccessSuballocations2nd(); + + if (m_2ndVectorMode == SECOND_VECTOR_RING_BUFFER) + { + VMA_ASSERT(0 && "Trying to use pool with linear algorithm as double stack, while it is already being used as ring buffer."); + return false; + } + + // Try to allocate before 2nd.back(), or end of block if 2nd.empty(). + if (allocSize > blockSize) + { + return false; + } + VkDeviceSize resultBaseOffset = blockSize - allocSize; + if (!suballocations2nd.empty()) + { + const VmaSuballocation& lastSuballoc = suballocations2nd.back(); + resultBaseOffset = lastSuballoc.offset - allocSize; + if (allocSize > lastSuballoc.offset) + { + return false; + } + } + + // Start from offset equal to end of free space. + VkDeviceSize resultOffset = resultBaseOffset; + + const VkDeviceSize debugMargin = GetDebugMargin(); + + // Apply debugMargin at the end. + if (debugMargin > 0) + { + if (resultOffset < debugMargin) + { + return false; + } + resultOffset -= debugMargin; + } + + // Apply alignment. + resultOffset = VmaAlignDown(resultOffset, allocAlignment); + + // Check next suballocations from 2nd for BufferImageGranularity conflicts. + // Make bigger alignment if necessary. + if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment && !suballocations2nd.empty()) + { + bool bufferImageGranularityConflict = false; + for (size_t nextSuballocIndex = suballocations2nd.size(); nextSuballocIndex--; ) + { + const VmaSuballocation& nextSuballoc = suballocations2nd[nextSuballocIndex]; + if (VmaBlocksOnSamePage(resultOffset, allocSize, nextSuballoc.offset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(nextSuballoc.type, allocType)) + { + bufferImageGranularityConflict = true; + break; + } + } + else + // Already on previous page. + break; + } + if (bufferImageGranularityConflict) + { + resultOffset = VmaAlignDown(resultOffset, bufferImageGranularity); + } + } + + // There is enough free space. + const VkDeviceSize endOf1st = !suballocations1st.empty() ? + suballocations1st.back().offset + suballocations1st.back().size : + 0; + if (endOf1st + debugMargin <= resultOffset) + { + // Check previous suballocations for BufferImageGranularity conflicts. + // If conflict exists, allocation cannot be made here. + if (bufferImageGranularity > 1) + { + for (size_t prevSuballocIndex = suballocations1st.size(); prevSuballocIndex--; ) + { + const VmaSuballocation& prevSuballoc = suballocations1st[prevSuballocIndex]; + if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, resultOffset, bufferImageGranularity)) + { + if (VmaIsBufferImageGranularityConflict(allocType, prevSuballoc.type)) + { + return false; + } + } + else + { + // Already on next page. + break; + } + } + } + + // All tests passed: Success. + pAllocationRequest->allocHandle = (VmaAllocHandle)(resultOffset + 1); + // pAllocationRequest->item unused. + pAllocationRequest->type = VmaAllocationRequestType::UpperAddress; + return true; + } + + return false; +} +#endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_LINEAR + +#ifndef _VMA_BLOCK_METADATA_TLSF +// To not search current larger region if first allocation won't succeed and skip to smaller range +// use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). +// When fragmentation and reusal of previous blocks doesn't matter then use with +// VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. +class VmaBlockMetadata_TLSF : public VmaBlockMetadata +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) +public: + VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual); + virtual ~VmaBlockMetadata_TLSF(); + + size_t GetAllocationCount() const override { return m_AllocCount; } + size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } + VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } + bool IsEmpty() const override { return m_NullBlock->offset == 0; } + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } + + void Init(VkDeviceSize size) override; + bool Validate() const override; + + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; + void AddStatistics(VmaStatistics& inoutStats) const override; + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json) const override; +#endif + + bool CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) override; + + VkResult CheckCorruption(const void* pBlockData) override; + void Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) override; + + void Free(VmaAllocHandle allocHandle) override; + void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; + void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; + VmaAllocHandle GetAllocationListBegin() const override; + VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; + VkDeviceSize GetNextFreeRegionSize(VmaAllocHandle alloc) const override; + void Clear() override; + void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; + void DebugLogAllAllocations() const override; + +private: + // According to original paper it should be preferable 4 or 5: + // M. Masmano, I. Ripoll, A. Crespo, and J. Real "TLSF: a New Dynamic Memory Allocator for Real-Time Systems" + // http://www.gii.upv.es/tlsf/files/ecrts04_tlsf.pdf + static const uint8_t SECOND_LEVEL_INDEX = 5; + static const uint16_t SMALL_BUFFER_SIZE = 256; + static const uint32_t INITIAL_BLOCK_ALLOC_COUNT = 16; + static const uint8_t MEMORY_CLASS_SHIFT = 7; + static const uint8_t MAX_MEMORY_CLASSES = 65 - MEMORY_CLASS_SHIFT; + + class Block + { + public: + VkDeviceSize offset; + VkDeviceSize size; + Block* prevPhysical; + Block* nextPhysical; + + void MarkFree() { prevFree = VMA_NULL; } + void MarkTaken() { prevFree = this; } + bool IsFree() const { return prevFree != this; } + void*& UserData() { VMA_HEAVY_ASSERT(!IsFree()); return userData; } + Block*& PrevFree() { return prevFree; } + Block*& NextFree() { VMA_HEAVY_ASSERT(IsFree()); return nextFree; } + + private: + Block* prevFree; // Address of the same block here indicates that block is taken + union + { + Block* nextFree; + void* userData; + }; + }; + + size_t m_AllocCount; + // Total number of free blocks besides null block + size_t m_BlocksFreeCount; + // Total size of free blocks excluding null block + VkDeviceSize m_BlocksFreeSize; + uint32_t m_IsFreeBitmap; + uint8_t m_MemoryClasses; + uint32_t m_InnerIsFreeBitmap[MAX_MEMORY_CLASSES]; + uint32_t m_ListsCount; + /* + * 0: 0-3 lists for small buffers + * 1+: 0-(2^SLI-1) lists for normal buffers + */ + Block** m_FreeList; + VmaPoolAllocator m_BlockAllocator; + Block* m_NullBlock; + VmaBlockBufferImageGranularity m_GranularityHandler; + + uint8_t SizeToMemoryClass(VkDeviceSize size) const; + uint16_t SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const; + uint32_t GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const; + uint32_t GetListIndex(VkDeviceSize size) const; + + void RemoveFreeBlock(Block* block); + void InsertFreeBlock(Block* block); + void MergeBlock(Block* block, Block* prev); + + Block* FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const; + bool CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest); +}; + +#ifndef _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +VmaBlockMetadata_TLSF::VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, + VkDeviceSize bufferImageGranularity, bool isVirtual) + : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), + m_AllocCount(0), + m_BlocksFreeCount(0), + m_BlocksFreeSize(0), + m_IsFreeBitmap(0), + m_MemoryClasses(0), + m_ListsCount(0), + m_FreeList(VMA_NULL), + m_BlockAllocator(pAllocationCallbacks, INITIAL_BLOCK_ALLOC_COUNT), + m_NullBlock(VMA_NULL), + m_GranularityHandler(bufferImageGranularity) {} + +VmaBlockMetadata_TLSF::~VmaBlockMetadata_TLSF() +{ + if (m_FreeList) + vma_delete_array(GetAllocationCallbacks(), m_FreeList, m_ListsCount); + m_GranularityHandler.Destroy(GetAllocationCallbacks()); +} + +void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) +{ + VmaBlockMetadata::Init(size); + + if (!IsVirtual()) + m_GranularityHandler.Init(GetAllocationCallbacks(), size); + + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = size; + m_NullBlock->offset = 0; + m_NullBlock->prevPhysical = VMA_NULL; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + uint8_t memoryClass = SizeToMemoryClass(size); + uint16_t sli = SizeToSecondIndex(size, memoryClass); + m_ListsCount = (memoryClass == 0 ? 0 : (memoryClass - 1) * (1UL << SECOND_LEVEL_INDEX) + sli) + 1; + if (IsVirtual()) + m_ListsCount += 1UL << SECOND_LEVEL_INDEX; + else + m_ListsCount += 4; + + m_MemoryClasses = memoryClass + uint8_t(2); + memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); + + m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); +} + +bool VmaBlockMetadata_TLSF::Validate() const +{ + VMA_VALIDATE(GetSumFreeSize() <= GetSize()); + + VkDeviceSize calculatedSize = m_NullBlock->size; + VkDeviceSize calculatedFreeSize = m_NullBlock->size; + size_t allocCount = 0; + size_t freeCount = 0; + + // Check integrity of free lists + for (uint32_t list = 0; list < m_ListsCount; ++list) + { + Block* block = m_FreeList[list]; + if (block != VMA_NULL) + { + VMA_VALIDATE(block->IsFree()); + VMA_VALIDATE(block->PrevFree() == VMA_NULL); + while (block->NextFree()) + { + VMA_VALIDATE(block->NextFree()->IsFree()); + VMA_VALIDATE(block->NextFree()->PrevFree() == block); + block = block->NextFree(); + } + } + } + + VkDeviceSize nextOffset = m_NullBlock->offset; + auto validateCtx = m_GranularityHandler.StartValidation(GetAllocationCallbacks(), IsVirtual()); + + VMA_VALIDATE(m_NullBlock->nextPhysical == VMA_NULL); + if (m_NullBlock->prevPhysical) + { + VMA_VALIDATE(m_NullBlock->prevPhysical->nextPhysical == m_NullBlock); + } + // Check all blocks + for (Block* prev = m_NullBlock->prevPhysical; prev != VMA_NULL; prev = prev->prevPhysical) + { + VMA_VALIDATE(prev->offset + prev->size == nextOffset); + nextOffset = prev->offset; + calculatedSize += prev->size; + + uint32_t listIndex = GetListIndex(prev->size); + if (prev->IsFree()) + { + ++freeCount; + // Check if free block belongs to free list + Block* freeBlock = m_FreeList[listIndex]; + VMA_VALIDATE(freeBlock != VMA_NULL); + + bool found = false; + do + { + if (freeBlock == prev) + found = true; + + freeBlock = freeBlock->NextFree(); + } while (!found && freeBlock != VMA_NULL); + + VMA_VALIDATE(found); + calculatedFreeSize += prev->size; + } + else + { + ++allocCount; + // Check if taken block is not on a free list + Block* freeBlock = m_FreeList[listIndex]; + while (freeBlock) + { + VMA_VALIDATE(freeBlock != prev); + freeBlock = freeBlock->NextFree(); + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.Validate(validateCtx, prev->offset, prev->size)); + } + } + + if (prev->prevPhysical) + { + VMA_VALIDATE(prev->prevPhysical->nextPhysical == prev); + } + } + + if (!IsVirtual()) + { + VMA_VALIDATE(m_GranularityHandler.FinishValidation(validateCtx)); + } + + VMA_VALIDATE(nextOffset == 0); + VMA_VALIDATE(calculatedSize == GetSize()); + VMA_VALIDATE(calculatedFreeSize == GetSumFreeSize()); + VMA_VALIDATE(allocCount == m_AllocCount); + VMA_VALIDATE(freeCount == m_BlocksFreeCount); + + return true; +} + +void VmaBlockMetadata_TLSF::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const +{ + inoutStats.statistics.blockCount++; + inoutStats.statistics.blockBytes += GetSize(); + if (m_NullBlock->size > 0) + VmaAddDetailedStatisticsUnusedRange(inoutStats, m_NullBlock->size); + + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree()) + VmaAddDetailedStatisticsUnusedRange(inoutStats, block->size); + else + VmaAddDetailedStatisticsAllocation(inoutStats, block->size); + } +} + +void VmaBlockMetadata_TLSF::AddStatistics(VmaStatistics& inoutStats) const +{ + inoutStats.blockCount++; + inoutStats.allocationCount += (uint32_t)m_AllocCount; + inoutStats.blockBytes += GetSize(); + inoutStats.allocationBytes += GetSize() - GetSumFreeSize(); +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +{ + size_t blockCount = m_AllocCount + m_BlocksFreeCount; + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(blockCount, allocator); + + size_t i = blockCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + blockList[--i] = block; + } + VMA_ASSERT(i == 0); + + VmaDetailedStatistics stats; + VmaClearDetailedStatistics(stats); + AddDetailedStatistics(stats); + + PrintDetailedMap_Begin(json, + stats.statistics.blockBytes - stats.statistics.allocationBytes, + stats.statistics.allocationCount, + stats.unusedRangeCount); + + for (; i < blockCount; ++i) + { + Block* block = blockList[i]; + if (block->IsFree()) + PrintDetailedMap_UnusedRange(json, block->offset, block->size); + else + PrintDetailedMap_Allocation(json, block->offset, block->size, block->UserData()); + } + if (m_NullBlock->size > 0) + PrintDetailedMap_UnusedRange(json, m_NullBlock->offset, m_NullBlock->size); + + PrintDetailedMap_End(json); +} +#endif + +bool VmaBlockMetadata_TLSF::CreateAllocationRequest( + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + bool upperAddress, + VmaSuballocationType allocType, + uint32_t strategy, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(allocSize > 0 && "Cannot allocate empty block!"); + VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); + + // For small granularity round up + if (!IsVirtual()) + m_GranularityHandler.RoundupAllocRequest(allocType, allocSize, allocAlignment); + + allocSize += GetDebugMargin(); + // Quick check for too small pool + if (allocSize > GetSumFreeSize()) + return false; + + // If no free blocks in pool then check only null block + if (m_BlocksFreeCount == 0) + return CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest); + + // Round up to the next block + VkDeviceSize sizeForNextList = allocSize; + VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); + if (allocSize > SMALL_BUFFER_SIZE) + { + sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); + } + else if (allocSize > SMALL_BUFFER_SIZE - smallSizeStep) + sizeForNextList = SMALL_BUFFER_SIZE + 1; + else + sizeForNextList += smallSizeStep; + + uint32_t nextListIndex = m_ListsCount; + uint32_t prevListIndex = m_ListsCount; + Block* nextListBlock = VMA_NULL; + Block* prevListBlock = VMA_NULL; + + // Check blocks according to strategies + if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) + { + // Quick check for larger block first + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + if (nextListBlock != VMA_NULL && CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // If not fitted then null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Null block failed, search larger bucket + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // Failed again, check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) + { + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + else if (strategy & VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT ) + { + // Perform search from the start + VmaStlAllocator allocator(GetAllocationCallbacks()); + VmaVector> blockList(m_BlocksFreeCount, allocator); + + size_t i = m_BlocksFreeCount; + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (block->IsFree() && block->size >= allocSize) + blockList[--i] = block; + } + + for (; i < m_BlocksFreeCount; ++i) + { + Block& block = *blockList[i]; + if (CheckBlock(block, GetListIndex(block.size), allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Whole range searched, no more memory + return false; + } + else + { + // Check larger bucket + nextListBlock = FindFreeBlock(sizeForNextList, nextListIndex); + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + + // If failed check null block + if (CheckBlock(*m_NullBlock, m_ListsCount, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + + // Check best fit bucket + prevListBlock = FindFreeBlock(allocSize, prevListIndex); + while (prevListBlock) + { + if (CheckBlock(*prevListBlock, prevListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + prevListBlock = prevListBlock->NextFree(); + } + } + + // Worst case, full search has to be done + while (++nextListIndex < m_ListsCount) + { + nextListBlock = m_FreeList[nextListIndex]; + while (nextListBlock) + { + if (CheckBlock(*nextListBlock, nextListIndex, allocSize, allocAlignment, allocType, pAllocationRequest)) + return true; + nextListBlock = nextListBlock->NextFree(); + } + } + + // No more memory sadly + return false; +} + +VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + { + if (!block->IsFree()) + { + if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); + return VK_ERROR_UNKNOWN_COPY; + } + } + } + + return VK_SUCCESS; +} + +void VmaBlockMetadata_TLSF::Alloc( + const VmaAllocationRequest& request, + VmaSuballocationType type, + void* userData) +{ + VMA_ASSERT(request.type == VmaAllocationRequestType::TLSF); + + // Get block and pop it from the free list + Block* currentBlock = (Block*)request.allocHandle; + VkDeviceSize offset = request.algorithmData; + VMA_ASSERT(currentBlock != VMA_NULL); + VMA_ASSERT(currentBlock->offset <= offset); + + if (currentBlock != m_NullBlock) + RemoveFreeBlock(currentBlock); + + VkDeviceSize debugMargin = GetDebugMargin(); + VkDeviceSize misssingAlignment = offset - currentBlock->offset; + + // Append missing alignment to prev block or create new one + if (misssingAlignment) + { + Block* prevBlock = currentBlock->prevPhysical; + VMA_ASSERT(prevBlock != VMA_NULL && "There should be no missing alignment at offset 0!"); + + if (prevBlock->IsFree() && prevBlock->size != debugMargin) + { + uint32_t oldList = GetListIndex(prevBlock->size); + prevBlock->size += misssingAlignment; + // Check if new size crosses list bucket + if (oldList != GetListIndex(prevBlock->size)) + { + prevBlock->size -= misssingAlignment; + RemoveFreeBlock(prevBlock); + prevBlock->size += misssingAlignment; + InsertFreeBlock(prevBlock); + } + else + m_BlocksFreeSize += misssingAlignment; + } + else + { + Block* newBlock = m_BlockAllocator.Alloc(); + currentBlock->prevPhysical = newBlock; + prevBlock->nextPhysical = newBlock; + newBlock->prevPhysical = prevBlock; + newBlock->nextPhysical = currentBlock; + newBlock->size = misssingAlignment; + newBlock->offset = currentBlock->offset; + newBlock->MarkTaken(); + + InsertFreeBlock(newBlock); + } + + currentBlock->size -= misssingAlignment; + currentBlock->offset += misssingAlignment; + } + + VkDeviceSize size = request.size + debugMargin; + if (currentBlock->size == size) + { + if (currentBlock == m_NullBlock) + { + // Setup new null block + m_NullBlock = m_BlockAllocator.Alloc(); + m_NullBlock->size = 0; + m_NullBlock->offset = currentBlock->offset + size; + m_NullBlock->prevPhysical = currentBlock; + m_NullBlock->nextPhysical = VMA_NULL; + m_NullBlock->MarkFree(); + m_NullBlock->PrevFree() = VMA_NULL; + m_NullBlock->NextFree() = VMA_NULL; + currentBlock->nextPhysical = m_NullBlock; + currentBlock->MarkTaken(); + } + } + else + { + VMA_ASSERT(currentBlock->size > size && "Proper block already found, shouldn't find smaller one!"); + + // Create new free block + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = currentBlock->size - size; + newBlock->offset = currentBlock->offset + size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + currentBlock->nextPhysical = newBlock; + currentBlock->size = size; + + if (currentBlock == m_NullBlock) + { + m_NullBlock = newBlock; + m_NullBlock->MarkFree(); + m_NullBlock->NextFree() = VMA_NULL; + m_NullBlock->PrevFree() = VMA_NULL; + currentBlock->MarkTaken(); + } + else + { + newBlock->nextPhysical->prevPhysical = newBlock; + newBlock->MarkTaken(); + InsertFreeBlock(newBlock); + } + } + currentBlock->UserData() = userData; + + if (debugMargin > 0) + { + currentBlock->size -= debugMargin; + Block* newBlock = m_BlockAllocator.Alloc(); + newBlock->size = debugMargin; + newBlock->offset = currentBlock->offset + currentBlock->size; + newBlock->prevPhysical = currentBlock; + newBlock->nextPhysical = currentBlock->nextPhysical; + newBlock->MarkTaken(); + currentBlock->nextPhysical->prevPhysical = newBlock; + currentBlock->nextPhysical = newBlock; + InsertFreeBlock(newBlock); + } + + if (!IsVirtual()) + m_GranularityHandler.AllocPages((uint8_t)(uintptr_t)request.customData, + currentBlock->offset, currentBlock->size); + ++m_AllocCount; +} + +void VmaBlockMetadata_TLSF::Free(VmaAllocHandle allocHandle) +{ + Block* block = (Block*)allocHandle; + Block* next = block->nextPhysical; + VMA_ASSERT(!block->IsFree() && "Block is already free!"); + + if (!IsVirtual()) + m_GranularityHandler.FreePages(block->offset, block->size); + --m_AllocCount; + + VkDeviceSize debugMargin = GetDebugMargin(); + if (debugMargin > 0) + { + RemoveFreeBlock(next); + MergeBlock(next, block); + block = next; + next = next->nextPhysical; + } + + // Try merging + Block* prev = block->prevPhysical; + if (prev != VMA_NULL && prev->IsFree() && prev->size != debugMargin) + { + RemoveFreeBlock(prev); + MergeBlock(block, prev); + } + + if (!next->IsFree()) + InsertFreeBlock(block); + else if (next == m_NullBlock) + MergeBlock(m_NullBlock, block); + else + { + RemoveFreeBlock(next); + MergeBlock(next, block); + InsertFreeBlock(next); + } +} + +void VmaBlockMetadata_TLSF::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get allocation info for free block!"); + outInfo.offset = block->offset; + outInfo.size = block->size; + outInfo.pUserData = block->UserData(); +} + +void* VmaBlockMetadata_TLSF::GetAllocationUserData(VmaAllocHandle allocHandle) const +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Cannot get user data for free block!"); + return block->UserData(); +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetAllocationListBegin() const +{ + if (m_AllocCount == 0) + return VK_NULL_HANDLE; + + for (Block* block = m_NullBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + VMA_ASSERT(false && "If m_AllocCount > 0 then should find any allocation!"); + return VK_NULL_HANDLE; +} + +VmaAllocHandle VmaBlockMetadata_TLSF::GetNextAllocation(VmaAllocHandle prevAlloc) const +{ + Block* startBlock = (Block*)prevAlloc; + VMA_ASSERT(!startBlock->IsFree() && "Incorrect block!"); + + for (Block* block = startBlock->prevPhysical; block; block = block->prevPhysical) + { + if (!block->IsFree()) + return (VmaAllocHandle)block; + } + return VK_NULL_HANDLE; +} + +VkDeviceSize VmaBlockMetadata_TLSF::GetNextFreeRegionSize(VmaAllocHandle alloc) const +{ + Block* block = (Block*)alloc; + VMA_ASSERT(!block->IsFree() && "Incorrect block!"); + + if (block->prevPhysical) + return block->prevPhysical->IsFree() ? block->prevPhysical->size : 0; + return 0; +} + +void VmaBlockMetadata_TLSF::Clear() +{ + m_AllocCount = 0; + m_BlocksFreeCount = 0; + m_BlocksFreeSize = 0; + m_IsFreeBitmap = 0; + m_NullBlock->offset = 0; + m_NullBlock->size = GetSize(); + Block* block = m_NullBlock->prevPhysical; + m_NullBlock->prevPhysical = VMA_NULL; + while (block) + { + Block* prev = block->prevPhysical; + m_BlockAllocator.Free(block); + block = prev; + } + memset(m_FreeList, 0, m_ListsCount * sizeof(Block*)); + memset(m_InnerIsFreeBitmap, 0, m_MemoryClasses * sizeof(uint32_t)); + m_GranularityHandler.Clear(); +} + +void VmaBlockMetadata_TLSF::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) +{ + Block* block = (Block*)allocHandle; + VMA_ASSERT(!block->IsFree() && "Trying to set user data for not allocated block!"); + block->UserData() = userData; +} + +void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const +{ + for (Block* block = m_NullBlock->prevPhysical; block != VMA_NULL; block = block->prevPhysical) + if (!block->IsFree()) + DebugLogAllocation(block->offset, block->size, block->UserData()); +} + +uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const +{ + if (size > SMALL_BUFFER_SIZE) + return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); + return 0; +} + +uint16_t VmaBlockMetadata_TLSF::SizeToSecondIndex(VkDeviceSize size, uint8_t memoryClass) const +{ + if (memoryClass == 0) + { + if (IsVirtual()) + return static_cast((size - 1) / 8); + else + return static_cast((size - 1) / 64); + } + return static_cast((size >> (memoryClass + MEMORY_CLASS_SHIFT - SECOND_LEVEL_INDEX)) ^ (1U << SECOND_LEVEL_INDEX)); +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(uint8_t memoryClass, uint16_t secondIndex) const +{ + if (memoryClass == 0) + return secondIndex; + + const uint32_t index = static_cast(memoryClass - 1) * (1 << SECOND_LEVEL_INDEX) + secondIndex; + if (IsVirtual()) + return index + (1 << SECOND_LEVEL_INDEX); + else + return index + 4; +} + +uint32_t VmaBlockMetadata_TLSF::GetListIndex(VkDeviceSize size) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + return GetListIndex(memoryClass, SizeToSecondIndex(size, memoryClass)); +} + +void VmaBlockMetadata_TLSF::RemoveFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(block->IsFree()); + + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block->PrevFree(); + if (block->PrevFree() != VMA_NULL) + block->PrevFree()->NextFree() = block->NextFree(); + else + { + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(m_FreeList[index] == block); + m_FreeList[index] = block->NextFree(); + if (block->NextFree() == VMA_NULL) + { + m_InnerIsFreeBitmap[memClass] &= ~(1U << secondIndex); + if (m_InnerIsFreeBitmap[memClass] == 0) + m_IsFreeBitmap &= ~(1UL << memClass); + } + } + block->MarkTaken(); + block->UserData() = VMA_NULL; + --m_BlocksFreeCount; + m_BlocksFreeSize -= block->size; +} + +void VmaBlockMetadata_TLSF::InsertFreeBlock(Block* block) +{ + VMA_ASSERT(block != m_NullBlock); + VMA_ASSERT(!block->IsFree() && "Cannot insert block twice!"); + + uint8_t memClass = SizeToMemoryClass(block->size); + uint16_t secondIndex = SizeToSecondIndex(block->size, memClass); + uint32_t index = GetListIndex(memClass, secondIndex); + VMA_ASSERT(index < m_ListsCount); + block->PrevFree() = VMA_NULL; + block->NextFree() = m_FreeList[index]; + m_FreeList[index] = block; + if (block->NextFree() != VMA_NULL) + block->NextFree()->PrevFree() = block; + else + { + m_InnerIsFreeBitmap[memClass] |= 1U << secondIndex; + m_IsFreeBitmap |= 1UL << memClass; + } + ++m_BlocksFreeCount; + m_BlocksFreeSize += block->size; +} + +void VmaBlockMetadata_TLSF::MergeBlock(Block* block, Block* prev) +{ + VMA_ASSERT(block->prevPhysical == prev && "Cannot merge separate physical regions!"); + VMA_ASSERT(!prev->IsFree() && "Cannot merge block that belongs to free list!"); + + block->offset = prev->offset; + block->size += prev->size; + block->prevPhysical = prev->prevPhysical; + if (block->prevPhysical) + block->prevPhysical->nextPhysical = block; + m_BlockAllocator.Free(prev); +} + +VmaBlockMetadata_TLSF::Block* VmaBlockMetadata_TLSF::FindFreeBlock(VkDeviceSize size, uint32_t& listIndex) const +{ + uint8_t memoryClass = SizeToMemoryClass(size); + uint32_t innerFreeMap = m_InnerIsFreeBitmap[memoryClass] & (~0U << SizeToSecondIndex(size, memoryClass)); + if (!innerFreeMap) + { + // Check higher levels for available blocks + uint32_t freeMap = m_IsFreeBitmap & (~0UL << (memoryClass + 1)); + if (!freeMap) + return VMA_NULL; // No more memory available + + // Find lowest free region + memoryClass = VMA_BITSCAN_LSB(freeMap); + innerFreeMap = m_InnerIsFreeBitmap[memoryClass]; + VMA_ASSERT(innerFreeMap != 0); + } + // Find lowest free subregion + listIndex = GetListIndex(memoryClass, VMA_BITSCAN_LSB(innerFreeMap)); + VMA_ASSERT(m_FreeList[listIndex]); + return m_FreeList[listIndex]; +} + +bool VmaBlockMetadata_TLSF::CheckBlock( + Block& block, + uint32_t listIndex, + VkDeviceSize allocSize, + VkDeviceSize allocAlignment, + VmaSuballocationType allocType, + VmaAllocationRequest* pAllocationRequest) +{ + VMA_ASSERT(block.IsFree() && "Block is already taken!"); + + VkDeviceSize alignedOffset = VmaAlignUp(block.offset, allocAlignment); + if (block.size < allocSize + alignedOffset - block.offset) + return false; + + // Check for granularity conflicts + if (!IsVirtual() && + m_GranularityHandler.CheckConflictAndAlignUp(alignedOffset, allocSize, block.offset, block.size, allocType)) + return false; + + // Alloc successful + pAllocationRequest->type = VmaAllocationRequestType::TLSF; + pAllocationRequest->allocHandle = (VmaAllocHandle)█ + pAllocationRequest->size = allocSize - GetDebugMargin(); + pAllocationRequest->customData = (void*)allocType; + pAllocationRequest->algorithmData = alignedOffset; + + // Place block at the start of list if it's normal block + if (listIndex != m_ListsCount && block.PrevFree()) + { + block.PrevFree()->NextFree() = block.NextFree(); + if (block.NextFree()) + block.NextFree()->PrevFree() = block.PrevFree(); + block.PrevFree() = VMA_NULL; + block.NextFree() = m_FreeList[listIndex]; + m_FreeList[listIndex] = █ + if (block.NextFree()) + block.NextFree()->PrevFree() = █ + } + + return true; +} +#endif // _VMA_BLOCK_METADATA_TLSF_FUNCTIONS +#endif // _VMA_BLOCK_METADATA_TLSF + +#ifndef _VMA_BLOCK_VECTOR +/* +Sequence of VmaDeviceMemoryBlock. Represents memory blocks allocated for a specific +Vulkan memory type. + +Synchronized internally with a mutex. +*/ +class VmaBlockVector +{ + friend struct VmaDefragmentationContext_T; + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) +public: + VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext); + ~VmaBlockVector(); + + VmaAllocator GetAllocator() const { return m_hAllocator; } + VmaPool GetParentPool() const { return m_hParentPool; } + bool IsCustomPool() const { return m_hParentPool != VMA_NULL; } + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } + VkDeviceSize GetPreferredBlockSize() const { return m_PreferredBlockSize; } + VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } + uint32_t GetAlgorithm() const { return m_Algorithm; } + bool HasExplicitBlockSize() const { return m_ExplicitBlockSize; } + float GetPriority() const { return m_Priority; } + const void* GetAllocationNextPtr() const { return m_pMemoryAllocateNext; } + // To be used only while the m_Mutex is locked. Used during defragmentation. + size_t GetBlockCount() const { return m_Blocks.size(); } + // To be used only while the m_Mutex is locked. Used during defragmentation. + VmaDeviceMemoryBlock* GetBlock(size_t index) const { return m_Blocks[index]; } + VMA_RW_MUTEX &GetMutex() { return m_Mutex; } + + VkResult CreateMinBlocks(); + void AddStatistics(VmaStatistics& inoutStats); + void AddDetailedStatistics(VmaDetailedStatistics& inoutStats); + bool IsEmpty(); + bool IsCorruptionDetectionEnabled() const; + + VkResult Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + void Free(const VmaAllocation hAllocation); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + VkResult CheckCorruption(); + +private: + const VmaAllocator m_hAllocator; + const VmaPool m_hParentPool; + const uint32_t m_MemoryTypeIndex; + const VkDeviceSize m_PreferredBlockSize; + const size_t m_MinBlockCount; + const size_t m_MaxBlockCount; + const VkDeviceSize m_BufferImageGranularity; + const bool m_ExplicitBlockSize; + const uint32_t m_Algorithm; + const float m_Priority; + const VkDeviceSize m_MinAllocationAlignment; + + void* const m_pMemoryAllocateNext; + VMA_RW_MUTEX m_Mutex; + // Incrementally sorted by sumFreeSize, ascending. + VmaVector> m_Blocks; + uint32_t m_NextBlockId; + bool m_IncrementalSort = true; + + void SetIncrementalSort(bool val) { m_IncrementalSort = val; } + + VkDeviceSize CalcMaxBlockSize() const; + // Finds and removes given block from vector. + void Remove(VmaDeviceMemoryBlock* pBlock); + // Performs single step in sorting m_Blocks. They may not be fully sorted + // after this call. + void IncrementallySortBlocks(); + void SortByFreeSize(); + + VkResult AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation); + + VkResult CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation); + + VkResult CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex); + bool HasEmptyBlock(); +}; +#endif // _VMA_BLOCK_VECTOR + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT +struct VmaDefragmentationContext_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) +public: + VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info); + ~VmaDefragmentationContext_T(); + + void GetStats(VmaDefragmentationStats& outStats) { outStats = m_GlobalStats; } + + VkResult DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo); + VkResult DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo); + +private: + // Max number of allocations to ignore due to size constraints before ending single pass + static const uint8_t MAX_ALLOCS_TO_IGNORE = 16; + enum class CounterStatus { Pass, Ignore, End }; + + struct FragmentedBlock + { + uint32_t data; + VmaDeviceMemoryBlock* block; + }; + struct StateBalanced + { + VkDeviceSize avgFreeSize = 0; + VkDeviceSize avgAllocSize = UINT64_MAX; + }; + struct StateExtensive + { + enum class Operation : uint8_t + { + FindFreeBlockBuffer, FindFreeBlockTexture, FindFreeBlockAll, + MoveBuffers, MoveTextures, MoveAll, + Cleanup, Done + }; + + Operation operation = Operation::FindFreeBlockTexture; + size_t firstFreeBlock = SIZE_MAX; + }; + struct MoveAllocationData + { + VkDeviceSize size; + VkDeviceSize alignment; + VmaSuballocationType type; + VmaAllocationCreateFlags flags; + VmaDefragmentationMove move = {}; + }; + + const VkDeviceSize m_MaxPassBytes; + const uint32_t m_MaxPassAllocations; + const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; + void* m_BreakCallbackUserData; + + VmaStlAllocator m_MoveAllocator; + VmaVector> m_Moves; + + uint8_t m_IgnoredAllocs = 0; + uint32_t m_Algorithm; + uint32_t m_BlockVectorCount; + VmaBlockVector* m_PoolBlockVector; + VmaBlockVector** m_pBlockVectors; + size_t m_ImmovableBlockCount = 0; + VmaDefragmentationStats m_GlobalStats = { 0 }; + VmaDefragmentationStats m_PassStats = { 0 }; + void* m_AlgorithmState = VMA_NULL; + + static MoveAllocationData GetMoveData(VmaAllocHandle handle, VmaBlockMetadata* metadata); + CounterStatus CheckCounters(VkDeviceSize bytes); + bool IncrementCounters(VkDeviceSize bytes); + bool ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block); + bool AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector); + + bool ComputeDefragmentation(VmaBlockVector& vector, size_t index); + bool ComputeDefragmentation_Fast(VmaBlockVector& vector); + bool ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update); + bool ComputeDefragmentation_Full(VmaBlockVector& vector); + bool ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index); + + void UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state); + bool MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent); +}; +#endif // _VMA_DEFRAGMENTATION_CONTEXT + +#ifndef _VMA_POOL_T +struct VmaPool_T +{ + friend struct VmaPoolListItemTraits; + VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) +public: + VmaBlockVector m_BlockVector; + VmaDedicatedAllocationList m_DedicatedAllocations; + + VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize); + ~VmaPool_T(); + + uint32_t GetId() const { return m_Id; } + void SetId(uint32_t id) { VMA_ASSERT(m_Id == 0); m_Id = id; } + + const char* GetName() const { return m_Name; } + void SetName(const char* pName); + +#if VMA_STATS_STRING_ENABLED + //void PrintDetailedMap(class VmaStringBuilder& sb); +#endif + +private: + uint32_t m_Id; + char* m_Name; + VmaPool_T* m_PrevPool = VMA_NULL; + VmaPool_T* m_NextPool = VMA_NULL; +}; + +struct VmaPoolListItemTraits +{ + typedef VmaPool_T ItemType; + + static ItemType* GetPrev(const ItemType* item) { return item->m_PrevPool; } + static ItemType* GetNext(const ItemType* item) { return item->m_NextPool; } + static ItemType*& AccessPrev(ItemType* item) { return item->m_PrevPool; } + static ItemType*& AccessNext(ItemType* item) { return item->m_NextPool; } +}; +#endif // _VMA_POOL_T + +#ifndef _VMA_CURRENT_BUDGET_DATA +struct VmaCurrentBudgetData +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; + VMA_ATOMIC_UINT64 m_AllocationBytes[VK_MAX_MEMORY_HEAPS]; + +#if VMA_MEMORY_BUDGET + VMA_ATOMIC_UINT32 m_OperationsSinceBudgetFetch; + VMA_RW_MUTEX m_BudgetMutex; + uint64_t m_VulkanUsage[VK_MAX_MEMORY_HEAPS]; + uint64_t m_VulkanBudget[VK_MAX_MEMORY_HEAPS]; + uint64_t m_BlockBytesAtBudgetFetch[VK_MAX_MEMORY_HEAPS]; +#endif // VMA_MEMORY_BUDGET + + VmaCurrentBudgetData(); + + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize); +}; + +#ifndef _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +VmaCurrentBudgetData::VmaCurrentBudgetData() +{ + for (uint32_t heapIndex = 0; heapIndex < VK_MAX_MEMORY_HEAPS; ++heapIndex) + { + m_BlockCount[heapIndex] = 0; + m_AllocationCount[heapIndex] = 0; + m_BlockBytes[heapIndex] = 0; + m_AllocationBytes[heapIndex] = 0; +#if VMA_MEMORY_BUDGET + m_VulkanUsage[heapIndex] = 0; + m_VulkanBudget[heapIndex] = 0; + m_BlockBytesAtBudgetFetch[heapIndex] = 0; +#endif + } + +#if VMA_MEMORY_BUDGET + m_OperationsSinceBudgetFetch = 0; +#endif +} + +void VmaCurrentBudgetData::AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + m_AllocationBytes[heapIndex] += allocationSize; + ++m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} + +void VmaCurrentBudgetData::RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) +{ + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); + m_AllocationBytes[heapIndex] -= allocationSize; + VMA_ASSERT(m_AllocationCount[heapIndex] > 0); + --m_AllocationCount[heapIndex]; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif +} +#endif // _VMA_CURRENT_BUDGET_DATA_FUNCTIONS +#endif // _VMA_CURRENT_BUDGET_DATA + +#ifndef _VMA_ALLOCATION_OBJECT_ALLOCATOR +/* +Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAllocation_T objects. +*/ +class VmaAllocationObjectAllocator +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) +public: + VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) + : m_Allocator(pAllocationCallbacks, 1024) {} + + template VmaAllocation Allocate(Types&&... args); + void Free(VmaAllocation hAlloc); + +private: + VMA_MUTEX m_Mutex; + VmaPoolAllocator m_Allocator; +}; + +template +VmaAllocation VmaAllocationObjectAllocator::Allocate(Types&&... args) +{ + VmaMutexLock mutexLock(m_Mutex); + return m_Allocator.Alloc(std::forward(args)...); +} + +void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) +{ + VmaMutexLock mutexLock(m_Mutex); + m_Allocator.Free(hAlloc); +} +#endif // _VMA_ALLOCATION_OBJECT_ALLOCATOR + +#ifndef _VMA_VIRTUAL_BLOCK_T +struct VmaVirtualBlock_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) +public: + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + + VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo); + ~VmaVirtualBlock_T(); + + VkResult Init() { return VK_SUCCESS; } + bool IsEmpty() const { return m_Metadata->IsEmpty(); } + void Free(VmaVirtualAllocation allocation) { m_Metadata->Free((VmaAllocHandle)allocation); } + void SetAllocationUserData(VmaVirtualAllocation allocation, void* userData) { m_Metadata->SetAllocationUserData((VmaAllocHandle)allocation, userData); } + void Clear() { m_Metadata->Clear(); } + + const VkAllocationCallbacks* GetAllocationCallbacks() const; + void GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo); + VkResult Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset); + void GetStatistics(VmaStatistics& outStats) const; + void CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const; +#if VMA_STATS_STRING_ENABLED + void BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const; +#endif + +private: + VmaBlockMetadata* m_Metadata; +}; + +#ifndef _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo) + : m_AllocationCallbacksSpecified(createInfo.pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(createInfo.pAllocationCallbacks != VMA_NULL ? *createInfo.pAllocationCallbacks : VmaEmptyAllocationCallbacks) +{ + const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; + switch (algorithm) + { + case 0: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + break; + case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); + break; + default: + VMA_ASSERT(0); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); + } + + m_Metadata->Init(createInfo.size); +} + +VmaVirtualBlock_T::~VmaVirtualBlock_T() +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_Metadata->IsEmpty()) + m_Metadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased virtual allocations. + VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + + vma_delete(GetAllocationCallbacks(), m_Metadata); +} + +const VkAllocationCallbacks* VmaVirtualBlock_T::GetAllocationCallbacks() const +{ + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; +} + +void VmaVirtualBlock_T::GetAllocationInfo(VmaVirtualAllocation allocation, VmaVirtualAllocationInfo& outInfo) +{ + m_Metadata->GetAllocationInfo((VmaAllocHandle)allocation, outInfo); +} + +VkResult VmaVirtualBlock_T::Allocate(const VmaVirtualAllocationCreateInfo& createInfo, VmaVirtualAllocation& outAllocation, + VkDeviceSize* outOffset) +{ + VmaAllocationRequest request = {}; + if (m_Metadata->CreateAllocationRequest( + createInfo.size, // allocSize + VMA_MAX(createInfo.alignment, (VkDeviceSize)1), // allocAlignment + (createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0, // upperAddress + VMA_SUBALLOCATION_TYPE_UNKNOWN, // allocType - unimportant + createInfo.flags & VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MASK, // strategy + &request)) + { + m_Metadata->Alloc(request, + VMA_SUBALLOCATION_TYPE_UNKNOWN, // type - unimportant + createInfo.pUserData); + outAllocation = (VmaVirtualAllocation)request.allocHandle; + if(outOffset) + *outOffset = m_Metadata->GetAllocationOffset(request.allocHandle); + return VK_SUCCESS; + } + outAllocation = (VmaVirtualAllocation)VK_NULL_HANDLE; + if (outOffset) + *outOffset = UINT64_MAX; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaVirtualBlock_T::GetStatistics(VmaStatistics& outStats) const +{ + VmaClearStatistics(outStats); + m_Metadata->AddStatistics(outStats); +} + +void VmaVirtualBlock_T::CalculateDetailedStatistics(VmaDetailedStatistics& outStats) const +{ + VmaClearDetailedStatistics(outStats); + m_Metadata->AddDetailedStatistics(outStats); +} + +#if VMA_STATS_STRING_ENABLED +void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) const +{ + VmaJsonWriter json(GetAllocationCallbacks(), sb); + json.BeginObject(); + + VmaDetailedStatistics stats; + CalculateDetailedStatistics(stats); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats); + + if (detailedMap) + { + json.WriteString("Details"); + json.BeginObject(); + m_Metadata->PrintDetailedMap(json); + json.EndObject(); + } + + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_VIRTUAL_BLOCK_T_FUNCTIONS +#endif // _VMA_VIRTUAL_BLOCK_T + + +// Main allocator object. +struct VmaAllocator_T +{ + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) +public: + const bool m_UseMutex; + const uint32_t m_VulkanApiVersion; + bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). + bool m_UseExtMemoryBudget; + bool m_UseAmdDeviceCoherentMemory; + bool m_UseKhrBufferDeviceAddress; + bool m_UseExtMemoryPriority; + bool m_UseKhrMaintenance4; + bool m_UseKhrMaintenance5; + const VkDevice m_hDevice; + const VkInstance m_hInstance; + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; + VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; + VmaAllocationObjectAllocator m_AllocationObjectAllocator; + + // Each bit (1 << i) is set if HeapSizeLimit is enabled for that heap, so cannot allocate more than the heap size. + uint32_t m_HeapSizeLimitMask; + + VkPhysicalDeviceProperties m_PhysicalDeviceProperties; + VkPhysicalDeviceMemoryProperties m_MemProps; + + // Default pools. + VmaBlockVector* m_pBlockVectors[VK_MAX_MEMORY_TYPES]; + VmaDedicatedAllocationList m_DedicatedAllocations[VK_MAX_MEMORY_TYPES]; + + VmaCurrentBudgetData m_Budget; + VMA_ATOMIC_UINT32 m_DeviceMemoryCount; // Total number of VkDeviceMemory objects. + + VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo); + VkResult Init(const VmaAllocatorCreateInfo* pCreateInfo); + ~VmaAllocator_T(); + + const VkAllocationCallbacks* GetAllocationCallbacks() const + { + return m_AllocationCallbacksSpecified ? &m_AllocationCallbacks : VMA_NULL; + } + const VmaVulkanFunctions& GetVulkanFunctions() const + { + return m_VulkanFunctions; + } + + VkPhysicalDevice GetPhysicalDevice() const { return m_PhysicalDevice; } + + VkDeviceSize GetBufferImageGranularity() const + { + return VMA_MAX( + static_cast(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY), + m_PhysicalDeviceProperties.limits.bufferImageGranularity); + } + + uint32_t GetMemoryHeapCount() const { return m_MemProps.memoryHeapCount; } + uint32_t GetMemoryTypeCount() const { return m_MemProps.memoryTypeCount; } + + uint32_t MemoryTypeIndexToHeapIndex(uint32_t memTypeIndex) const + { + VMA_ASSERT(memTypeIndex < m_MemProps.memoryTypeCount); + return m_MemProps.memoryTypes[memTypeIndex].heapIndex; + } + // True when specific memory type is HOST_VISIBLE but not HOST_COHERENT. + bool IsMemoryTypeNonCoherent(uint32_t memTypeIndex) const + { + return (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) == + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + // Minimum alignment for all allocations in specific memory type. + VkDeviceSize GetMemoryTypeMinAlignment(uint32_t memTypeIndex) const + { + return IsMemoryTypeNonCoherent(memTypeIndex) ? + VMA_MAX((VkDeviceSize)VMA_MIN_ALIGNMENT, m_PhysicalDeviceProperties.limits.nonCoherentAtomSize) : + (VkDeviceSize)VMA_MIN_ALIGNMENT; + } + + bool IsIntegratedGpu() const + { + return m_PhysicalDeviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; + } + + uint32_t GetGlobalMemoryTypeBits() const { return m_GlobalMemoryTypeBits; } + + void GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + void GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const; + + // Main allocation function. + VkResult AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Main deallocation function. + void FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations); + + void CalculateStatistics(VmaTotalStatistics* pStats); + + void GetHeapBudgets( + VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount); + +#if VMA_STATS_STRING_ENABLED + void PrintDetailedMap(class VmaJsonWriter& json); +#endif + + void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); + void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); + + VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); + void DestroyPool(VmaPool pool); + void GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats); + void CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats); + + void SetCurrentFrameIndex(uint32_t frameIndex); + uint32_t GetCurrentFrameIndex() const { return m_CurrentFrameIndex.load(); } + + VkResult CheckPoolCorruption(VmaPool hPool); + VkResult CheckCorruption(uint32_t memoryTypeBits); + + // Call to Vulkan function vkAllocateMemory with accompanying bookkeeping. + VkResult AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory); + // Call to Vulkan function vkFreeMemory with accompanying bookkeeping. + void FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory); + // Call to Vulkan function vkBindBufferMemory or vkBindBufferMemory2KHR. + VkResult BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext); + // Call to Vulkan function vkBindImageMemory or vkBindImageMemory2KHR. + VkResult BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext); + + VkResult Map(VmaAllocation hAllocation, void** ppData); + void Unmap(VmaAllocation hAllocation); + + VkResult BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext); + VkResult BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext); + + VkResult FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op); + VkResult FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op); + + VkResult CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + VkResult CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size); + + void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); + + /* + Returns bit mask of memory types that can support defragmentation on GPU as + they support creation of required buffer for copy operations. + */ + uint32_t GetGpuDefragmentationMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR GetExternalMemoryHandleTypeFlags(uint32_t memTypeIndex) const + { + return m_TypeExternalMemoryHandleTypes[memTypeIndex]; + } +#endif // #if VMA_EXTERNAL_MEMORY + +private: + VkDeviceSize m_PreferredLargeHeapBlockSize; + + VkPhysicalDevice m_PhysicalDevice; + VMA_ATOMIC_UINT32 m_CurrentFrameIndex; + VMA_ATOMIC_UINT32 m_GpuDefragmentationMemoryTypeBits; // UINT32_MAX means uninitialized. +#if VMA_EXTERNAL_MEMORY + VkExternalMemoryHandleTypeFlagsKHR m_TypeExternalMemoryHandleTypes[VK_MAX_MEMORY_TYPES]; +#endif // #if VMA_EXTERNAL_MEMORY + + VMA_RW_MUTEX m_PoolsMutex; + typedef VmaIntrusiveLinkedList PoolList; + // Protected by m_PoolsMutex. + PoolList m_Pools; + uint32_t m_NextPoolId; + + VmaVulkanFunctions m_VulkanFunctions; + + // Global bit mask AND-ed with any memoryTypeBits to disallow certain memory types. + uint32_t m_GlobalMemoryTypeBits; + + void ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Static(); +#endif + + void ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions); + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + void ImportVulkanFunctions_Dynamic(); +#endif + + void ValidateVulkanFunctions(); + + VkDeviceSize CalcPreferredBlockSize(uint32_t memTypeIndex); + + VkResult AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations); + + // Helper function only to be used inside AllocateDedicatedMemory. + VkResult AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation); + + // Allocates and registers new VkDeviceMemory specifically for dedicated allocations. + VkResult AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain = VMA_NULL); + + void FreeDedicatedMemory(const VmaAllocation allocation); + + VkResult CalcMemTypeParams( + VmaAllocationCreateInfo& outCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount); + VkResult CalcAllocationParams( + VmaAllocationCreateInfo& outCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred); + + /* + Calculates and returns bit mask of memory types that can support defragmentation + on GPU as they support creation of required buffer for copy operations. + */ + uint32_t CalculateGpuDefragmentationMemoryTypeBits() const; + uint32_t CalculateGlobalMemoryTypeBits() const; + + bool GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const; + +#if VMA_MEMORY_BUDGET + void UpdateVulkanBudget(); +#endif // #if VMA_MEMORY_BUDGET +}; + + +#ifndef _VMA_MEMORY_FUNCTIONS +static void* VmaMalloc(VmaAllocator hAllocator, size_t size, size_t alignment) +{ + return VmaMalloc(&hAllocator->m_AllocationCallbacks, size, alignment); +} + +static void VmaFree(VmaAllocator hAllocator, void* ptr) +{ + VmaFree(&hAllocator->m_AllocationCallbacks, ptr); +} + +template +static T* VmaAllocate(VmaAllocator hAllocator) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T), VMA_ALIGN_OF(T)); +} + +template +static T* VmaAllocateArray(VmaAllocator hAllocator, size_t count) +{ + return (T*)VmaMalloc(hAllocator, sizeof(T) * count, VMA_ALIGN_OF(T)); +} + +template +static void vma_delete(VmaAllocator hAllocator, T* ptr) +{ + if(ptr != VMA_NULL) + { + ptr->~T(); + VmaFree(hAllocator, ptr); + } +} + +template +static void vma_delete_array(VmaAllocator hAllocator, T* ptr, size_t count) +{ + if(ptr != VMA_NULL) + { + for(size_t i = count; i--; ) + ptr[i].~T(); + VmaFree(hAllocator, ptr); + } +} +#endif // _VMA_MEMORY_FUNCTIONS + +#ifndef _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS +VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) + : m_pMetadata(VMA_NULL), + m_MemoryTypeIndex(UINT32_MAX), + m_Id(0), + m_hMemory(VK_NULL_HANDLE), + m_MapCount(0), + m_pMappedData(VMA_NULL) {} + +VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); + VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); +} + +void VmaDeviceMemoryBlock::Init( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t newMemoryTypeIndex, + VkDeviceMemory newMemory, + VkDeviceSize newSize, + uint32_t id, + uint32_t algorithm, + VkDeviceSize bufferImageGranularity) +{ + VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + + m_hParentPool = hParentPool; + m_MemoryTypeIndex = newMemoryTypeIndex; + m_Id = id; + m_hMemory = newMemory; + + switch (algorithm) + { + case 0: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; + default: + VMA_ASSERT(0); + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + } + m_pMetadata->Init(newSize); +} + +void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) +{ + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. + if (!m_pMetadata->IsEmpty()) + m_pMetadata->DebugLogAllAllocations(); + // This is the most important assert in the entire library. + // Hitting it means you have some memory leak - unreleased VmaAllocation objects. + VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + + VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); + allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); + m_hMemory = VK_NULL_HANDLE; + + vma_delete(allocator, m_pMetadata); + m_pMetadata = VMA_NULL; +} + +void VmaDeviceMemoryBlock::PostAlloc(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + m_MappingHysteresis.PostAlloc(); +} + +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if(m_MappingHysteresis.PostFree()) + { + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + } +} + +bool VmaDeviceMemoryBlock::Validate() const +{ + VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && + (m_pMetadata->GetSize() != 0)); + + return m_pMetadata->Validate(); +} + +VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) +{ + void* pData = VMA_NULL; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + res = m_pMetadata->CheckCorruption(pData); + + Unmap(hAllocator, 1); + + return res; +} + +VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void** ppData) +{ + if (count == 0) + { + return VK_SUCCESS; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (oldTotalMapCount != 0) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount += count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + return VK_SUCCESS; + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + &m_pMappedData); + if (result == VK_SUCCESS) + { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount = count; + if (ppData != VMA_NULL) + { + *ppData = m_pMappedData; + } + } + return result; + } +} + +void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) +{ + if (count == 0) + { + return; + } + + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + if (m_MapCount >= count) + { + m_MapCount -= count; + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + m_MappingHysteresis.PostUnmap(); + } + else + { + VMA_ASSERT(0 && "VkDeviceMemory block is being unmapped while it was not previously mapped."); + } +} + +VkResult VmaDeviceMemoryBlock::WriteMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + VmaWriteMagicValue(pData, allocOffset + allocSize); + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::ValidateMagicValueAfterAllocation(VmaAllocator hAllocator, VkDeviceSize allocOffset, VkDeviceSize allocSize) +{ + VMA_ASSERT(VMA_DEBUG_MARGIN > 0 && VMA_DEBUG_MARGIN % 4 == 0 && VMA_DEBUG_DETECT_CORRUPTION); + + void* pData; + VkResult res = Map(hAllocator, 1, &pData); + if (res != VK_SUCCESS) + { + return res; + } + + if (!VmaValidateMagicValue(pData, allocOffset + allocSize)) + { + VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER FREED ALLOCATION!"); + } + + Unmap(hAllocator, 1); + return VK_SUCCESS; +} + +VkResult VmaDeviceMemoryBlock::BindBufferMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); +} + +VkResult VmaDeviceMemoryBlock::BindImageMemory( + const VmaAllocator hAllocator, + const VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VMA_ASSERT(hAllocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_BLOCK && + hAllocation->GetBlock() == this); + VMA_ASSERT(allocationLocalOffset < hAllocation->GetSize() && + "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); + const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; + // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); +} +#endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS + +#ifndef _VMA_ALLOCATION_T_FUNCTIONS +VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) + : m_Alignment{ 1 }, + m_Size{ 0 }, + m_pUserData{ VMA_NULL }, + m_pName{ VMA_NULL }, + m_MemoryTypeIndex{ 0 }, + m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, + m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, + m_MapCount{ 0 }, + m_Flags{ 0 } +{ + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; +} + +VmaAllocation_T::~VmaAllocation_T() +{ + VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + + // Check if owned string was freed. + VMA_ASSERT(m_pName == VMA_NULL); +} + +void VmaAllocation_T::InitBlockAllocation( + VmaDeviceMemoryBlock* block, + VmaAllocHandle allocHandle, + VkDeviceSize alignment, + VkDeviceSize size, + uint32_t memoryTypeIndex, + VmaSuballocationType suballocationType, + bool mapped) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(block != VMA_NULL); + m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_Alignment = alignment; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_SuballocationType = (uint8_t)suballocationType; + m_BlockAllocation.m_Block = block; + m_BlockAllocation.m_AllocHandle = allocHandle; +} + +void VmaAllocation_T::InitDedicatedAllocation( + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceMemory hMemory, + VmaSuballocationType suballocationType, + void* pMappedData, + VkDeviceSize size) +{ + VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); + VMA_ASSERT(hMemory != VK_NULL_HANDLE); + m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; + m_Alignment = 0; + m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; + m_SuballocationType = (uint8_t)suballocationType; + if(pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } + m_DedicatedAllocation.m_hParentPool = hParentPool; + m_DedicatedAllocation.m_hMemory = hMemory; + m_DedicatedAllocation.m_pMappedData = pMappedData; + m_DedicatedAllocation.m_Prev = VMA_NULL; + m_DedicatedAllocation.m_Next = VMA_NULL; +} + +void VmaAllocation_T::SetName(VmaAllocator hAllocator, const char* pName) +{ + VMA_ASSERT(pName == VMA_NULL || pName != m_pName); + + FreeName(hAllocator); + + if (pName != VMA_NULL) + m_pName = VmaCreateStringCopy(hAllocator->GetAllocationCallbacks(), pName); +} + +uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocation allocation) +{ + VMA_ASSERT(allocation != VMA_NULL); + VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(allocation->m_Type == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount != 0) + m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); + + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); + std::swap(m_BlockAllocation, allocation->m_BlockAllocation); + m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); + +#if VMA_STATS_STRING_ENABLED + std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); +#endif + return m_MapCount; +} + +VmaAllocHandle VmaAllocation_T::GetAllocHandle() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_AllocHandle; + case ALLOCATION_TYPE_DEDICATED: + return VK_NULL_HANDLE; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceSize VmaAllocation_T::GetOffset() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->m_pMetadata->GetAllocationOffset(m_BlockAllocation.m_AllocHandle); + case ALLOCATION_TYPE_DEDICATED: + return 0; + default: + VMA_ASSERT(0); + return 0; + } +} + +VmaPool VmaAllocation_T::GetParentPool() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetParentPool(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hParentPool; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +VkDeviceMemory VmaAllocation_T::GetMemory() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + return m_BlockAllocation.m_Block->GetDeviceMemory(); + case ALLOCATION_TYPE_DEDICATED: + return m_DedicatedAllocation.m_hMemory; + default: + VMA_ASSERT(0); + return VK_NULL_HANDLE; + } +} + +void* VmaAllocation_T::GetMappedData() const +{ + switch (m_Type) + { + case ALLOCATION_TYPE_BLOCK: + if (m_MapCount != 0 || IsPersistentMap()) + { + void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); + VMA_ASSERT(pBlockData != VMA_NULL); + return (char*)pBlockData + GetOffset(); + } + else + { + return VMA_NULL; + } + break; + case ALLOCATION_TYPE_DEDICATED: + VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); + return m_DedicatedAllocation.m_pMappedData; + default: + VMA_ASSERT(0); + return VMA_NULL; + } +} + +void VmaAllocation_T::BlockAllocMap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + if (m_MapCount < 0xFF) + { + ++m_MapCount; + } + else + { + VMA_ASSERT(0 && "Allocation mapped too many times simultaneously."); + } +} + +void VmaAllocation_T::BlockAllocUnmap() +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + + if (m_MapCount > 0) + { + --m_MapCount; + } + else + { + VMA_ASSERT(0 && "Unmapping allocation not previously mapped."); + } +} + +VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + + if (m_MapCount != 0 || IsPersistentMap()) + { + if (m_MapCount < 0xFF) + { + VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); + *ppData = m_DedicatedAllocation.m_pMappedData; + ++m_MapCount; + return VK_SUCCESS; + } + else + { + VMA_ASSERT(0 && "Dedicated allocation mapped too many times simultaneously."); + return VK_ERROR_MEMORY_MAP_FAILED; + } + } + else + { + VkResult result = (*hAllocator->GetVulkanFunctions().vkMapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory, + 0, // offset + VK_WHOLE_SIZE, + 0, // flags + ppData); + if (result == VK_SUCCESS) + { + m_DedicatedAllocation.m_pMappedData = *ppData; + m_MapCount = 1; + } + return result; + } +} + +void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) +{ + VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + + if (m_MapCount > 0) + { + --m_MapCount; + if (m_MapCount == 0 && !IsPersistentMap()) + { + m_DedicatedAllocation.m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( + hAllocator->m_hDevice, + m_DedicatedAllocation.m_hMemory); + } + } + else + { + VMA_ASSERT(0 && "Unmapping dedicated allocation not previously mapped."); + } +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const +{ + json.WriteString("Type"); + json.WriteString(VMA_SUBALLOCATION_TYPE_NAMES[m_SuballocationType]); + + json.WriteString("Size"); + json.WriteNumber(m_Size); + json.WriteString("Usage"); + json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. + + if (m_pUserData != VMA_NULL) + { + json.WriteString("CustomData"); + json.BeginString(); + json.ContinueString_Pointer(m_pUserData); + json.EndString(); + } + if (m_pName != VMA_NULL) + { + json.WriteString("Name"); + json.WriteString(m_pName); + } +} +#endif // VMA_STATS_STRING_ENABLED + +void VmaAllocation_T::FreeName(VmaAllocator hAllocator) +{ + if(m_pName) + { + VmaFreeString(hAllocator->GetAllocationCallbacks(), m_pName); + m_pName = VMA_NULL; + } +} +#endif // _VMA_ALLOCATION_T_FUNCTIONS + +#ifndef _VMA_BLOCK_VECTOR_FUNCTIONS +VmaBlockVector::VmaBlockVector( + VmaAllocator hAllocator, + VmaPool hParentPool, + uint32_t memoryTypeIndex, + VkDeviceSize preferredBlockSize, + size_t minBlockCount, + size_t maxBlockCount, + VkDeviceSize bufferImageGranularity, + bool explicitBlockSize, + uint32_t algorithm, + float priority, + VkDeviceSize minAllocationAlignment, + void* pMemoryAllocateNext) + : m_hAllocator(hAllocator), + m_hParentPool(hParentPool), + m_MemoryTypeIndex(memoryTypeIndex), + m_PreferredBlockSize(preferredBlockSize), + m_MinBlockCount(minBlockCount), + m_MaxBlockCount(maxBlockCount), + m_BufferImageGranularity(bufferImageGranularity), + m_ExplicitBlockSize(explicitBlockSize), + m_Algorithm(algorithm), + m_Priority(priority), + m_MinAllocationAlignment(minAllocationAlignment), + m_pMemoryAllocateNext(pMemoryAllocateNext), + m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), + m_NextBlockId(0) {} + +VmaBlockVector::~VmaBlockVector() +{ + for (size_t i = m_Blocks.size(); i--; ) + { + m_Blocks[i]->Destroy(m_hAllocator); + vma_delete(m_hAllocator, m_Blocks[i]); + } +} + +VkResult VmaBlockVector::CreateMinBlocks() +{ + for (size_t i = 0; i < m_MinBlockCount; ++i) + { + VkResult res = CreateBlock(m_PreferredBlockSize, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +void VmaBlockVector::AddStatistics(VmaStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddStatistics(inoutStats); + } +} + +void VmaBlockVector::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + const size_t blockCount = m_Blocks.size(); + for (uint32_t blockIndex = 0; blockIndex < blockCount; ++blockIndex) + { + const VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VMA_HEAVY_ASSERT(pBlock->Validate()); + pBlock->m_pMetadata->AddDetailedStatistics(inoutStats); + } +} + +bool VmaBlockVector::IsEmpty() +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + return m_Blocks.empty(); +} + +bool VmaBlockVector::IsCorruptionDetectionEnabled() const +{ + const uint32_t requiredMemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + return (VMA_DEBUG_DETECT_CORRUPTION != 0) && + (VMA_DEBUG_MARGIN > 0) && + (m_Algorithm == 0 || m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) && + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & requiredMemFlags) == requiredMemFlags; +} + +VkResult VmaBlockVector::Allocate( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + size_t allocIndex; + VkResult res = VK_SUCCESS; + + alignment = VMA_MAX(alignment, m_MinAllocationAlignment); + + if (IsCorruptionDetectionEnabled()) + { + size = VmaAlignUp(size, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + alignment = VmaAlignUp(alignment, sizeof(VMA_CORRUPTION_DETECTION_MAGIC_VALUE)); + } + + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocatePage( + size, + alignment, + createInfo, + suballocType, + pAllocations + allocIndex); + if (res != VK_SUCCESS) + { + break; + } + } + } + + if (res != VK_SUCCESS) + { + // Free all already created allocations. + while (allocIndex--) + Free(pAllocations[allocIndex]); + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaBlockVector::AllocatePage( + VkDeviceSize size, + VkDeviceSize alignment, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VkDeviceSize freeMemory; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; + } + + const bool canFallbackToDedicated = !HasExplicitBlockSize() && + (createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0; + const bool canCreateNewBlock = + ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && + (m_Blocks.size() < m_MaxBlockCount) && + (freeMemory >= size || !canFallbackToDedicated); + uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; + + // Upper address can only be used with linear allocator and within single memory block. + if (isUpperAddress && + (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT || m_MaxBlockCount > 1)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + // Early reject: requested allocation size is larger that maximum block size for this block vector. + if (size + VMA_DEBUG_MARGIN > m_PreferredBlockSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + + // 1. Search existing allocations. Try to allocate. + if (m_Algorithm == VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Use only last block. + if (!m_Blocks.empty()) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + else + { + if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default + { + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) + { + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + else // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT + { + // Backward order in m_Blocks - prefer blocks with largest amount of free space. + for (size_t blockIndex = m_Blocks.size(); blockIndex--; ) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + + // 2. Try to create new block. + if (canCreateNewBlock) + { + // Calculate optimal size for new block. + VkDeviceSize newBlockSize = m_PreferredBlockSize; + uint32_t newBlockSizeShift = 0; + const uint32_t NEW_BLOCK_SIZE_SHIFT_MAX = 3; + + if (!m_ExplicitBlockSize) + { + // Allocate 1/8, 1/4, 1/2 as first blocks. + const VkDeviceSize maxExistingBlockSize = CalcMaxBlockSize(); + for (uint32_t i = 0; i < NEW_BLOCK_SIZE_SHIFT_MAX; ++i) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize > maxExistingBlockSize && smallerNewBlockSize >= size * 2) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + } + else + { + break; + } + } + } + + size_t newBlockIndex = 0; + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. + if (!m_ExplicitBlockSize) + { + while (res < 0 && newBlockSizeShift < NEW_BLOCK_SIZE_SHIFT_MAX) + { + const VkDeviceSize smallerNewBlockSize = newBlockSize / 2; + if (smallerNewBlockSize >= size) + { + newBlockSize = smallerNewBlockSize; + ++newBlockSizeShift; + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? + CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + else + { + break; + } + } + } + + if (res == VK_SUCCESS) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[newBlockIndex]; + VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); + + res = AllocateFromBlock( + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + else + { + // Allocation from new block failed, possibly due to VMA_DEBUG_MARGIN or alignment. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + } + + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +void VmaBlockVector::Free(const VmaAllocation hAllocation) +{ + VmaDeviceMemoryBlock* pBlockToDelete = VMA_NULL; + + bool budgetExceeded = false; + { + const uint32_t heapIndex = m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex); + VmaBudget heapBudget = {}; + m_hAllocator->GetHeapBudgets(&heapBudget, heapIndex, 1); + budgetExceeded = heapBudget.usage >= heapBudget.budget; + } + + // Scope for lock. + { + VmaMutexLockWrite lock(m_Mutex, m_hAllocator->m_UseMutex); + + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->ValidateMagicValueAfterAllocation(m_hAllocator, hAllocation->GetOffset(), hAllocation->GetSize()); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to validate magic value."); + } + + if (hAllocation->IsPersistentMap()) + { + pBlock->Unmap(m_hAllocator, 1); + } + + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); + pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); + VMA_HEAVY_ASSERT(pBlock->Validate()); + + VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); + + const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; + // pBlock became empty after this deallocation. + if (pBlock->m_pMetadata->IsEmpty()) + { + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) + { + pBlockToDelete = pBlock; + Remove(pBlock); + } + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. + } + // pBlock didn't become empty, but we have another empty block - find and free that one. + // (This is optional, heuristics.) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) + { + VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); + if (pLastBlock->m_pMetadata->IsEmpty()) + { + pBlockToDelete = pLastBlock; + m_Blocks.pop_back(); + } + } + + IncrementallySortBlocks(); + } + + // Destruction of a free block. Deferred until this point, outside of mutex + // lock, for performance reason. + if (pBlockToDelete != VMA_NULL) + { + VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); + pBlockToDelete->Destroy(m_hAllocator); + vma_delete(m_hAllocator, pBlockToDelete); + } + + m_hAllocator->m_Budget.RemoveAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), hAllocation->GetSize()); + m_hAllocator->m_AllocationObjectAllocator.Free(hAllocation); +} + +VkDeviceSize VmaBlockVector::CalcMaxBlockSize() const +{ + VkDeviceSize result = 0; + for (size_t i = m_Blocks.size(); i--; ) + { + result = VMA_MAX(result, m_Blocks[i]->m_pMetadata->GetSize()); + if (result >= m_PreferredBlockSize) + { + break; + } + } + return result; +} + +void VmaBlockVector::Remove(VmaDeviceMemoryBlock* pBlock) +{ + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + if (m_Blocks[blockIndex] == pBlock) + { + VmaVectorRemove(m_Blocks, blockIndex); + return; + } + } + VMA_ASSERT(0); +} + +void VmaBlockVector::IncrementallySortBlocks() +{ + if (!m_IncrementalSort) + return; + if (m_Algorithm != VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + { + // Bubble sort only until first swap. + for (size_t i = 1; i < m_Blocks.size(); ++i) + { + if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) + { + std::swap(m_Blocks[i - 1], m_Blocks[i]); + return; + } + } + } +} + +void VmaBlockVector::SortByFreeSize() +{ + VMA_SORT(m_Blocks.begin(), m_Blocks.end(), + [](VmaDeviceMemoryBlock* b1, VmaDeviceMemoryBlock* b2) -> bool + { + return b1->m_pMetadata->GetSumFreeSize() < b2->m_pMetadata->GetSumFreeSize(); + }); +} + +VkResult VmaBlockVector::AllocateFromBlock( + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize size, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + uint32_t strategy, + VmaAllocation* pAllocation) +{ + const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; + + VmaAllocationRequest currRequest = {}; + if (pBlock->m_pMetadata->CreateAllocationRequest( + size, + alignment, + isUpperAddress, + suballocType, + strategy, + &currRequest)) + { + return CommitAllocationRequest(currRequest, pBlock, alignment, allocFlags, pUserData, suballocType, pAllocation); + } + return VK_ERROR_OUT_OF_DEVICE_MEMORY; +} + +VkResult VmaBlockVector::CommitAllocationRequest( + VmaAllocationRequest& allocRequest, + VmaDeviceMemoryBlock* pBlock, + VkDeviceSize alignment, + VmaAllocationCreateFlags allocFlags, + void* pUserData, + VmaSuballocationType suballocType, + VmaAllocation* pAllocation) +{ + const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; + const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + + pBlock->PostAlloc(m_hAllocator); + // Allocate from pCurrBlock. + if (mapped) + { + VkResult res = pBlock->Map(m_hAllocator, 1, VMA_NULL); + if (res != VK_SUCCESS) + { + return res; + } + } + + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isMappingAllowed); + pBlock->m_pMetadata->Alloc(allocRequest, suballocType, *pAllocation); + (*pAllocation)->InitBlockAllocation( + pBlock, + allocRequest.allocHandle, + alignment, + allocRequest.size, // Not size, as actual allocation size may be larger than requested! + m_MemoryTypeIndex, + suballocType, + mapped); + VMA_HEAVY_ASSERT(pBlock->Validate()); + if (isUserDataString) + (*pAllocation)->SetName(m_hAllocator, (const char*)pUserData); + else + (*pAllocation)->SetUserData(m_hAllocator, pUserData); + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), allocRequest.size); + if (VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + if (IsCorruptionDetectionEnabled()) + { + VkResult res = pBlock->WriteMagicValueAfterAllocation(m_hAllocator, (*pAllocation)->GetOffset(), allocRequest.size); + VMA_ASSERT(res == VK_SUCCESS && "Couldn't map block memory to write magic value."); + } + return VK_SUCCESS; +} + +VkResult VmaBlockVector::CreateBlock(VkDeviceSize blockSize, size_t* pNewBlockIndex) +{ + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.pNext = m_pMemoryAllocateNext; + allocInfo.memoryTypeIndex = m_MemoryTypeIndex; + allocInfo.allocationSize = blockSize; + +#if VMA_BUFFER_DEVICE_ADDRESS + // Every standalone block can potentially contain a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT - always enable the feature. + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if (m_hAllocator->m_UseKhrBufferDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } +#endif // VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if (m_hAllocator->m_UseExtMemoryPriority) + { + VMA_ASSERT(m_Priority >= 0.f && m_Priority <= 1.f); + priorityInfo.priority = m_Priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = m_hAllocator->GetExternalMemoryHandleTypeFlags(m_MemoryTypeIndex); + if (exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // VMA_EXTERNAL_MEMORY + + VkDeviceMemory mem = VK_NULL_HANDLE; + VkResult res = m_hAllocator->AllocateVulkanMemory(&allocInfo, &mem); + if (res < 0) + { + return res; + } + + // New VkDeviceMemory successfully created. + + // Create new Allocation for it. + VmaDeviceMemoryBlock* const pBlock = vma_new(m_hAllocator, VmaDeviceMemoryBlock)(m_hAllocator); + pBlock->Init( + m_hAllocator, + m_hParentPool, + m_MemoryTypeIndex, + mem, + allocInfo.allocationSize, + m_NextBlockId++, + m_Algorithm, + m_BufferImageGranularity); + + m_Blocks.push_back(pBlock); + if (pNewBlockIndex != VMA_NULL) + { + *pNewBlockIndex = m_Blocks.size() - 1; + } + + return VK_SUCCESS; +} + +bool VmaBlockVector::HasEmptyBlock() +{ + for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; + if (pBlock->m_pMetadata->IsEmpty()) + { + return true; + } + } + return false; +} + +#if VMA_STATS_STRING_ENABLED +void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) +{ + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + + + json.BeginObject(); + for (size_t i = 0; i < m_Blocks.size(); ++i) + { + json.BeginString(); + json.ContinueString(m_Blocks[i]->GetId()); + json.EndString(); + + json.BeginObject(); + json.WriteString("MapRefCount"); + json.WriteNumber(m_Blocks[i]->GetMapRefCount()); + + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + json.EndObject(); + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED + +VkResult VmaBlockVector::CheckCorruption() +{ + if (!IsCorruptionDetectionEnabled()) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VmaMutexLockRead lock(m_Mutex, m_hAllocator->m_UseMutex); + for (uint32_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pBlock); + VkResult res = pBlock->CheckCorruption(m_hAllocator); + if (res != VK_SUCCESS) + { + return res; + } + } + return VK_SUCCESS; +} + +#endif // _VMA_BLOCK_VECTOR_FUNCTIONS + +#ifndef _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS +VmaDefragmentationContext_T::VmaDefragmentationContext_T( + VmaAllocator hAllocator, + const VmaDefragmentationInfo& info) + : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), + m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_BreakCallback(info.pfnBreakCallback), + m_BreakCallbackUserData(info.pBreakCallbackUserData), + m_MoveAllocator(hAllocator->GetAllocationCallbacks()), + m_Moves(m_MoveAllocator) +{ + m_Algorithm = info.flags & VMA_DEFRAGMENTATION_FLAG_ALGORITHM_MASK; + + if (info.pool != VMA_NULL) + { + m_BlockVectorCount = 1; + m_PoolBlockVector = &info.pool->m_BlockVector; + m_pBlockVectors = &m_PoolBlockVector; + m_PoolBlockVector->SetIncrementalSort(false); + m_PoolBlockVector->SortByFreeSize(); + } + else + { + m_BlockVectorCount = hAllocator->GetMemoryTypeCount(); + m_PoolBlockVector = VMA_NULL; + m_pBlockVectors = hAllocator->m_pBlockVectors; + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + { + vector->SetIncrementalSort(false); + vector->SortByFreeSize(); + } + } + } + + switch (m_Algorithm) + { + case 0: // Default algorithm + m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + if (hAllocator->GetBufferImageGranularity() > 1) + { + m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); + } + break; + } +} + +VmaDefragmentationContext_T::~VmaDefragmentationContext_T() +{ + if (m_PoolBlockVector != VMA_NULL) + { + m_PoolBlockVector->SetIncrementalSort(true); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + VmaBlockVector* vector = m_pBlockVectors[i]; + if (vector != VMA_NULL) + vector->SetIncrementalSort(true); + } + } + + if (m_AlgorithmState) + { + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + vma_delete_array(m_MoveAllocator.m_pCallbacks, reinterpret_cast(m_AlgorithmState), m_BlockVectorCount); + break; + default: + VMA_ASSERT(0); + } + } +} + +VkResult VmaDefragmentationContext_T::DefragmentPassBegin(VmaDefragmentationPassMoveInfo& moveInfo) +{ + if (m_PoolBlockVector != VMA_NULL) + { + VmaMutexLockWrite lock(m_PoolBlockVector->GetMutex(), m_PoolBlockVector->GetAllocator()->m_UseMutex); + + if (m_PoolBlockVector->GetBlockCount() > 1) + ComputeDefragmentation(*m_PoolBlockVector, 0); + else if (m_PoolBlockVector->GetBlockCount() == 1) + ReallocWithinBlock(*m_PoolBlockVector, m_PoolBlockVector->GetBlock(0)); + } + else + { + for (uint32_t i = 0; i < m_BlockVectorCount; ++i) + { + if (m_pBlockVectors[i] != VMA_NULL) + { + VmaMutexLockWrite lock(m_pBlockVectors[i]->GetMutex(), m_pBlockVectors[i]->GetAllocator()->m_UseMutex); + + if (m_pBlockVectors[i]->GetBlockCount() > 1) + { + if (ComputeDefragmentation(*m_pBlockVectors[i], i)) + break; + } + else if (m_pBlockVectors[i]->GetBlockCount() == 1) + { + if (ReallocWithinBlock(*m_pBlockVectors[i], m_pBlockVectors[i]->GetBlock(0))) + break; + } + } + } + } + + moveInfo.moveCount = static_cast(m_Moves.size()); + if (moveInfo.moveCount > 0) + { + moveInfo.pMoves = m_Moves.data(); + return VK_INCOMPLETE; + } + + moveInfo.pMoves = VMA_NULL; + return VK_SUCCESS; +} + +VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMoveInfo& moveInfo) +{ + VMA_ASSERT(moveInfo.moveCount > 0 ? moveInfo.pMoves != VMA_NULL : true); + + VkResult result = VK_SUCCESS; + VmaStlAllocator blockAllocator(m_MoveAllocator.m_pCallbacks); + VmaVector> immovableBlocks(blockAllocator); + VmaVector> mappedBlocks(blockAllocator); + + VmaAllocator allocator = VMA_NULL; + for (uint32_t i = 0; i < moveInfo.moveCount; ++i) + { + VmaDefragmentationMove& move = moveInfo.pMoves[i]; + size_t prevCount = 0, currentCount = 0; + VkDeviceSize freedBlockSize = 0; + + uint32_t vectorIndex; + VmaBlockVector* vector; + if (m_PoolBlockVector != VMA_NULL) + { + vectorIndex = 0; + vector = m_PoolBlockVector; + } + else + { + vectorIndex = move.srcAllocation->GetMemoryTypeIndex(); + vector = m_pBlockVectors[vectorIndex]; + VMA_ASSERT(vector != VMA_NULL); + } + + switch (move.operation) + { + case VMA_DEFRAGMENTATION_MOVE_OPERATION_COPY: + { + uint8_t mapCount = move.srcAllocation->SwapBlockAllocation(vector->m_hAllocator, move.dstTmpAllocation); + if (mapCount > 0) + { + allocator = vector->m_hAllocator; + VmaDeviceMemoryBlock* newMapBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (FragmentedBlock& block : mappedBlocks) + { + if (block.block == newMapBlock) + { + notPresent = false; + block.data += mapCount; + break; + } + } + if (notPresent) + mappedBlocks.push_back({ mapCount, newMapBlock }); + } + + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + vector->Free(move.dstTmpAllocation); + + VmaDeviceMemoryBlock* newBlock = move.srcAllocation->GetBlock(); + bool notPresent = true; + for (const FragmentedBlock& block : immovableBlocks) + { + if (block.block == newBlock) + { + notPresent = false; + break; + } + } + if (notPresent) + immovableBlocks.push_back({ vectorIndex, newBlock }); + break; + } + case VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY: + { + m_PassStats.bytesMoved -= move.srcAllocation->GetSize(); + --m_PassStats.allocationsMoved; + // Scope for locks, Free have it's own lock + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + prevCount = vector->GetBlockCount(); + freedBlockSize = move.srcAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.srcAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + currentCount = vector->GetBlockCount(); + } + freedBlockSize *= prevCount - currentCount; + + VkDeviceSize dstBlockSize; + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + dstBlockSize = move.dstTmpAllocation->GetBlock()->m_pMetadata->GetSize(); + } + vector->Free(move.dstTmpAllocation); + { + VmaMutexLockRead lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + freedBlockSize += dstBlockSize * (currentCount - vector->GetBlockCount()); + currentCount = vector->GetBlockCount(); + } + + result = VK_INCOMPLETE; + break; + } + default: + VMA_ASSERT(0); + } + + if (prevCount > currentCount) + { + size_t freedBlocks = prevCount - currentCount; + m_PassStats.deviceMemoryBlocksFreed += static_cast(freedBlocks); + m_PassStats.bytesFreed += freedBlockSize; + } + + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && + m_AlgorithmState != VMA_NULL) + { + // Avoid unnecessary tries to allocate when new free block is available + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) + { + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) + { + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); + } + else + state.firstFreeBlock = 0; + } + } + } + moveInfo.moveCount = 0; + moveInfo.pMoves = VMA_NULL; + m_Moves.clear(); + + // Update stats + m_GlobalStats.allocationsMoved += m_PassStats.allocationsMoved; + m_GlobalStats.bytesFreed += m_PassStats.bytesFreed; + m_GlobalStats.bytesMoved += m_PassStats.bytesMoved; + m_GlobalStats.deviceMemoryBlocksFreed += m_PassStats.deviceMemoryBlocksFreed; + m_PassStats = { 0 }; + + // Move blocks with immovable allocations according to algorithm + if (immovableBlocks.size() > 0) + { + do + { + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) + { + if (m_AlgorithmState != VMA_NULL) + { + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) + { + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) + { + if (i + 1 < state.firstFreeBlock) + { + if (state.firstFreeBlock > 1) + std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } + } + swapped = true; + break; + } + } + } + } + if (swapped) + result = VK_INCOMPLETE; + break; + } + } + + // Move to the beginning + for (const FragmentedBlock& block : immovableBlocks) + { + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = m_ImmovableBlockCount; i < vector->GetBlockCount(); ++i) + { + if (vector->GetBlock(i) == block.block) + { + std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + break; + } + } + } + } while (false); + } + + // Bulk-map destination blocks + for (const FragmentedBlock& block : mappedBlocks) + { + VkResult res = block.block->Map(allocator, block.data, VMA_NULL); + VMA_ASSERT(res == VK_SUCCESS); + } + return result; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, size_t index) +{ + switch (m_Algorithm) + { + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: + return ComputeDefragmentation_Fast(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + return ComputeDefragmentation_Balanced(vector, index, true); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: + return ComputeDefragmentation_Full(vector); + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: + return ComputeDefragmentation_Extensive(vector, index); + default: + VMA_ASSERT(0); + return ComputeDefragmentation_Balanced(vector, index, true); + } +} + +VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::GetMoveData( + VmaAllocHandle handle, VmaBlockMetadata* metadata) +{ + MoveAllocationData moveData; + moveData.move.srcAllocation = (VmaAllocation)metadata->GetAllocationUserData(handle); + moveData.size = moveData.move.srcAllocation->GetSize(); + moveData.alignment = moveData.move.srcAllocation->GetAlignment(); + moveData.type = moveData.move.srcAllocation->GetSuballocationType(); + moveData.flags = 0; + + if (moveData.move.srcAllocation->IsPersistentMap()) + moveData.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + if (moveData.move.srcAllocation->IsMappingAllowed()) + moveData.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + + return moveData; +} + +VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) +{ + // Check custom criteria if exists + if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) + return CounterStatus::End; + + // Ignore allocation if will exceed max size for copy + if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) + { + if (++m_IgnoredAllocs < MAX_ALLOCS_TO_IGNORE) + return CounterStatus::Ignore; + else + return CounterStatus::End; + } + else + m_IgnoredAllocs = 0; + return CounterStatus::Pass; +} + +bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) +{ + m_PassStats.bytesMoved += bytes; + // Early return when max found + if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) + { + VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); + return true; + } + return false; +} + +bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, VmaDeviceMemoryBlock* block) +{ + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::AllocInOtherBlock(size_t start, size_t end, MoveAllocationData& data, VmaBlockVector& vector) +{ + for (; start < end; ++start) + { + VmaDeviceMemoryBlock* dstBlock = vector.GetBlock(start); + if (dstBlock->m_pMetadata->GetSumFreeSize() >= data.size) + { + if (vector.AllocateFromBlock(dstBlock, + data.size, + data.alignment, + data.flags, + this, + data.type, + 0, + &data.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(data.move); + if (IncrementCounters(data.size)) + return true; + break; + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& vector) +{ + // Move only between blocks + + // Go through allocations in last blocks and try to fit them inside first ones + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector& vector, size_t index, bool update) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0), + // but only if there are noticeable gaps between them (some heuristic, ex. average size of allocation in block) + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateBalanced& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + if (update && vectorState.avgAllocSize == UINT64_MAX) + UpdateVectorStatistics(vector, vectorState); + + const size_t startMoveCount = m_Moves.size(); + VkDeviceSize minimalFreeRegion = vectorState.avgFreeSize / 2; + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + VkDeviceSize prevFreeRegionSize = 0; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + VkDeviceSize nextFreeRegionSize = metadata->GetNextFreeRegionSize(handle); + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + // Check if realloc will make sense + if (prevFreeRegionSize >= minimalFreeRegion || + nextFreeRegionSize >= minimalFreeRegion || + moveData.size <= vectorState.avgFreeSize || + moveData.size <= vectorState.avgAllocSize) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + prevFreeRegionSize = nextFreeRegionSize; + } + } + + // No moves performed, update statistics to current vector state + if (startMoveCount == m_Moves.size() && !update) + { + vectorState.avgAllocSize = UINT64_MAX; + return ComputeDefragmentation_Balanced(vector, index, false); + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& vector) +{ + // Go over every allocation and try to fit it in previous blocks at lowest offsets, + // if not possible: realloc within single block to minimize offset (exclude offset == 0) + + for (size_t i = vector.GetBlockCount() - 1; i > m_ImmovableBlockCount; --i) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + const size_t prevMoveCount = m_Moves.size(); + if (AllocInOtherBlock(0, i, moveData, vector)) + return true; + + // If no room found then realloc within block for lower offset + VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); + if (prevMoveCount == m_Moves.size() && offset != 0 && metadata->GetSumFreeSize() >= moveData.size) + { + VmaAllocationRequest request = {}; + if (metadata->CreateAllocationRequest( + moveData.size, + moveData.alignment, + false, + moveData.type, + VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT, + &request)) + { + if (metadata->GetAllocationOffset(request.allocHandle) < offset) + { + if (vector.CommitAllocationRequest( + request, + block, + moveData.alignment, + moveData.flags, + this, + moveData.type, + &moveData.move.dstTmpAllocation) == VK_SUCCESS) + { + m_Moves.push_back(moveData.move); + if (IncrementCounters(moveData.size)) + return true; + } + } + } + } + } + } + return false; +} + +bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVector& vector, size_t index) +{ + // First free single block, then populate it to the brim, then free another block, and so on + + // Fallback to previous algorithm since without granularity conflicts it can achieve max packing + if (vector.m_BufferImageGranularity == 1) + return ComputeDefragmentation_Full(vector); + + VMA_ASSERT(m_AlgorithmState != VMA_NULL); + + StateExtensive& vectorState = reinterpret_cast(m_AlgorithmState)[index]; + + bool texturePresent = false, bufferPresent = false, otherPresent = false; + switch (vectorState.operation) + { + case StateExtensive::Operation::Done: // Vector defragmented + return false; + case StateExtensive::Operation::FindFreeBlockBuffer: + case StateExtensive::Operation::FindFreeBlockTexture: + case StateExtensive::Operation::FindFreeBlockAll: + { + // No more blocks to free, just perform fast realloc and move to cleanup + if (vectorState.firstFreeBlock == 0) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + return ComputeDefragmentation_Fast(vector); + } + + // No free blocks, have to clear last one + size_t last = (vectorState.firstFreeBlock == SIZE_MAX ? vector.GetBlockCount() : vectorState.firstFreeBlock) - 1; + VmaBlockMetadata* freeMetadata = vector.GetBlock(last)->m_pMetadata; + + const size_t prevMoveCount = m_Moves.size(); + for (VmaAllocHandle handle = freeMetadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = freeMetadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, freeMetadata); + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Check all previous blocks for free space + if (AllocInOtherBlock(0, last, moveData, vector)) + { + // Full clear performed already + if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) + vectorState.firstFreeBlock = last; + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // Cannot perform full clear, have to move data in other blocks around + if (last != 0) + { + for (size_t i = last - 1; i; --i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + } + + if (prevMoveCount == m_Moves.size()) + { + // No possible reallocs within blocks, try to move them around fast + return ComputeDefragmentation_Fast(vector); + } + } + else + { + switch (vectorState.operation) + { + case StateExtensive::Operation::FindFreeBlockBuffer: + vectorState.operation = StateExtensive::Operation::MoveBuffers; + break; + case StateExtensive::Operation::FindFreeBlockTexture: + vectorState.operation = StateExtensive::Operation::MoveTextures; + break; + case StateExtensive::Operation::FindFreeBlockAll: + vectorState.operation = StateExtensive::Operation::MoveAll; + break; + default: + VMA_ASSERT(0); + vectorState.operation = StateExtensive::Operation::MoveTextures; + } + vectorState.firstFreeBlock = last; + // Nothing done, block found without reallocations, can perform another reallocs in same pass + return ComputeDefragmentation_Extensive(vector, index); + } + break; + } + case StateExtensive::Operation::MoveTextures: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (texturePresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockTexture; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!bufferPresent && !otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more textures to move, check buffers + vectorState.operation = StateExtensive::Operation::MoveBuffers; + bufferPresent = false; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveBuffers: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_BUFFER, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (bufferPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + + if (!otherPresent) + { + vectorState.operation = StateExtensive::Operation::Cleanup; + break; + } + + // No more buffers to move, check all others + vectorState.operation = StateExtensive::Operation::MoveAll; + otherPresent = false; + } + else + break; + VMA_FALLTHROUGH; // Fallthrough + } + case StateExtensive::Operation::MoveAll: + { + if (MoveDataToFreeBlocks(VMA_SUBALLOCATION_TYPE_FREE, vector, + vectorState.firstFreeBlock, texturePresent, bufferPresent, otherPresent)) + { + if (otherPresent) + { + vectorState.operation = StateExtensive::Operation::FindFreeBlockBuffer; + return ComputeDefragmentation_Extensive(vector, index); + } + // Everything moved + vectorState.operation = StateExtensive::Operation::Cleanup; + } + break; + } + case StateExtensive::Operation::Cleanup: + // Cleanup is handled below so that other operations may reuse the cleanup code. This case is here to prevent the unhandled enum value warning (C4062). + break; + } + + if (vectorState.operation == StateExtensive::Operation::Cleanup) + { + // All other work done, pack data in blocks even tighter if possible + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + if (ReallocWithinBlock(vector, vector.GetBlock(i))) + return true; + } + + if (prevMoveCount == m_Moves.size()) + vectorState.operation = StateExtensive::Operation::Done; + } + return false; +} + +void VmaDefragmentationContext_T::UpdateVectorStatistics(VmaBlockVector& vector, StateBalanced& state) +{ + size_t allocCount = 0; + size_t freeCount = 0; + state.avgFreeSize = 0; + state.avgAllocSize = 0; + + for (size_t i = 0; i < vector.GetBlockCount(); ++i) + { + VmaBlockMetadata* metadata = vector.GetBlock(i)->m_pMetadata; + + allocCount += metadata->GetAllocationCount(); + freeCount += metadata->GetFreeRegionsCount(); + state.avgFreeSize += metadata->GetSumFreeSize(); + state.avgAllocSize += metadata->GetSize(); + } + + state.avgAllocSize = (state.avgAllocSize - state.avgFreeSize) / allocCount; + state.avgFreeSize /= freeCount; +} + +bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType currentType, + VmaBlockVector& vector, size_t firstFreeBlock, + bool& texturePresent, bool& bufferPresent, bool& otherPresent) +{ + const size_t prevMoveCount = m_Moves.size(); + for (size_t i = firstFreeBlock ; i;) + { + VmaDeviceMemoryBlock* block = vector.GetBlock(--i); + VmaBlockMetadata* metadata = block->m_pMetadata; + + for (VmaAllocHandle handle = metadata->GetAllocationListBegin(); + handle != VK_NULL_HANDLE; + handle = metadata->GetNextAllocation(handle)) + { + MoveAllocationData moveData = GetMoveData(handle, metadata); + // Ignore newly created allocations by defragmentation algorithm + if (moveData.move.srcAllocation->GetUserData() == this) + continue; + switch (CheckCounters(moveData.move.srcAllocation->GetSize())) + { + case CounterStatus::Ignore: + continue; + case CounterStatus::End: + return true; + case CounterStatus::Pass: + break; + default: + VMA_ASSERT(0); + } + + // Move only single type of resources at once + if (!VmaIsBufferImageGranularityConflict(moveData.type, currentType)) + { + // Try to fit allocation into free blocks + if (AllocInOtherBlock(firstFreeBlock, vector.GetBlockCount(), moveData, vector)) + return false; + } + + if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL)) + texturePresent = true; + else if (!VmaIsBufferImageGranularityConflict(moveData.type, VMA_SUBALLOCATION_TYPE_BUFFER)) + bufferPresent = true; + else + otherPresent = true; + } + } + return prevMoveCount == m_Moves.size(); +} +#endif // _VMA_DEFRAGMENTATION_CONTEXT_FUNCTIONS + +#ifndef _VMA_POOL_T_FUNCTIONS +VmaPool_T::VmaPool_T( + VmaAllocator hAllocator, + const VmaPoolCreateInfo& createInfo, + VkDeviceSize preferredBlockSize) + : m_BlockVector( + hAllocator, + this, // hParentPool + createInfo.memoryTypeIndex, + createInfo.blockSize != 0 ? createInfo.blockSize : preferredBlockSize, + createInfo.minBlockCount, + createInfo.maxBlockCount, + (createInfo.flags& VMA_POOL_CREATE_IGNORE_BUFFER_IMAGE_GRANULARITY_BIT) != 0 ? 1 : hAllocator->GetBufferImageGranularity(), + createInfo.blockSize != 0, // explicitBlockSize + createInfo.flags & VMA_POOL_CREATE_ALGORITHM_MASK, // algorithm + createInfo.priority, + VMA_MAX(hAllocator->GetMemoryTypeMinAlignment(createInfo.memoryTypeIndex), createInfo.minAllocationAlignment), + createInfo.pMemoryAllocateNext), + m_Id(0), + m_Name(VMA_NULL) {} + +VmaPool_T::~VmaPool_T() +{ + VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); +} + +void VmaPool_T::SetName(const char* pName) +{ + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); + + if (pName != VMA_NULL) + { + m_Name = VmaCreateStringCopy(allocs, pName); + } + else + { + m_Name = VMA_NULL; + } +} +#endif // _VMA_POOL_T_FUNCTIONS + +#ifndef _VMA_ALLOCATOR_T_FUNCTIONS +VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : + m_UseMutex((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT) == 0), + m_VulkanApiVersion(pCreateInfo->vulkanApiVersion != 0 ? pCreateInfo->vulkanApiVersion : VK_API_VERSION_1_0), + m_UseKhrDedicatedAllocation((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0), + m_UseKhrBindMemory2((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0), + m_UseExtMemoryBudget((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0), + m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), + m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), + m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), + m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), + m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), + m_hDevice(pCreateInfo->device), + m_hInstance(pCreateInfo->instance), + m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), + m_AllocationCallbacks(pCreateInfo->pAllocationCallbacks ? + *pCreateInfo->pAllocationCallbacks : VmaEmptyAllocationCallbacks), + m_AllocationObjectAllocator(&m_AllocationCallbacks), + m_HeapSizeLimitMask(0), + m_DeviceMemoryCount(0), + m_PreferredLargeHeapBlockSize(0), + m_PhysicalDevice(pCreateInfo->physicalDevice), + m_GpuDefragmentationMemoryTypeBits(UINT32_MAX), + m_NextPoolId(0), + m_GlobalMemoryTypeBits(UINT32_MAX) +{ + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_UseKhrDedicatedAllocation = false; + m_UseKhrBindMemory2 = false; + } + + if(VMA_DEBUG_DETECT_CORRUPTION) + { + // Needs to be multiply of uint32_t size because we are going to write VMA_CORRUPTION_DETECTION_MAGIC_VALUE to it. + VMA_ASSERT(VMA_DEBUG_MARGIN % sizeof(uint32_t) == 0); + } + + VMA_ASSERT(pCreateInfo->physicalDevice && pCreateInfo->device && pCreateInfo->instance); + + if(m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0)) + { +#if !(VMA_DEDICATED_ALLOCATION) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT set but required extensions are disabled by preprocessor macros."); + } +#endif +#if !(VMA_BIND_MEMORY2) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT set but required extension is disabled by preprocessor macros."); + } +#endif + } +#if !(VMA_MEMORY_BUDGET) + if((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT) != 0) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT set but required extension is disabled by preprocessor macros."); + } +#endif +#if !(VMA_BUFFER_DEVICE_ADDRESS) + if(m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT is set but required extension or Vulkan 1.2 is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if VMA_VULKAN_VERSION < 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_3 but required Vulkan version is disabled by preprocessor macros."); + } +#endif +#if VMA_VULKAN_VERSION < 1002000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 2, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_2 but required Vulkan version is disabled by preprocessor macros."); + } +#endif +#if VMA_VULKAN_VERSION < 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(0 && "vulkanApiVersion >= VK_API_VERSION_1_1 but required Vulkan version is disabled by preprocessor macros."); + } +#endif +#if !(VMA_MEMORY_PRIORITY) + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE4) + if(m_UseKhrMaintenance4) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif + + memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); + memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); + memset(&m_MemProps, 0, sizeof(m_MemProps)); + + memset(&m_pBlockVectors, 0, sizeof(m_pBlockVectors)); + memset(&m_VulkanFunctions, 0, sizeof(m_VulkanFunctions)); + +#if VMA_EXTERNAL_MEMORY + memset(&m_TypeExternalMemoryHandleTypes, 0, sizeof(m_TypeExternalMemoryHandleTypes)); +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pDeviceMemoryCallbacks != VMA_NULL) + { + m_DeviceMemoryCallbacks.pUserData = pCreateInfo->pDeviceMemoryCallbacks->pUserData; + m_DeviceMemoryCallbacks.pfnAllocate = pCreateInfo->pDeviceMemoryCallbacks->pfnAllocate; + m_DeviceMemoryCallbacks.pfnFree = pCreateInfo->pDeviceMemoryCallbacks->pfnFree; + } + + ImportVulkanFunctions(pCreateInfo->pVulkanFunctions); + + (*m_VulkanFunctions.vkGetPhysicalDeviceProperties)(m_PhysicalDevice, &m_PhysicalDeviceProperties); + (*m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties)(m_PhysicalDevice, &m_MemProps); + + VMA_ASSERT(VmaIsPow2(VMA_MIN_ALIGNMENT)); + VMA_ASSERT(VmaIsPow2(VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.bufferImageGranularity)); + VMA_ASSERT(VmaIsPow2(m_PhysicalDeviceProperties.limits.nonCoherentAtomSize)); + + m_PreferredLargeHeapBlockSize = (pCreateInfo->preferredLargeHeapBlockSize != 0) ? + pCreateInfo->preferredLargeHeapBlockSize : static_cast(VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE); + + m_GlobalMemoryTypeBits = CalculateGlobalMemoryTypeBits(); + +#if VMA_EXTERNAL_MEMORY + if(pCreateInfo->pTypeExternalMemoryHandleTypes != VMA_NULL) + { + memcpy(m_TypeExternalMemoryHandleTypes, pCreateInfo->pTypeExternalMemoryHandleTypes, + sizeof(VkExternalMemoryHandleTypeFlagsKHR) * GetMemoryTypeCount()); + } +#endif // #if VMA_EXTERNAL_MEMORY + + if(pCreateInfo->pHeapSizeLimit != VMA_NULL) + { + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + const VkDeviceSize limit = pCreateInfo->pHeapSizeLimit[heapIndex]; + if(limit != VK_WHOLE_SIZE) + { + m_HeapSizeLimitMask |= 1u << heapIndex; + if(limit < m_MemProps.memoryHeaps[heapIndex].size) + { + m_MemProps.memoryHeaps[heapIndex].size = limit; + } + } + } + } + + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + // Create only supported types + if((m_GlobalMemoryTypeBits & (1u << memTypeIndex)) != 0) + { + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(memTypeIndex); + m_pBlockVectors[memTypeIndex] = vma_new(this, VmaBlockVector)( + this, + VK_NULL_HANDLE, // hParentPool + memTypeIndex, + preferredBlockSize, + 0, + SIZE_MAX, + GetBufferImageGranularity(), + false, // explicitBlockSize + 0, // algorithm + 0.5f, // priority (0.5 is the default per Vulkan spec) + GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment + VMA_NULL); // // pMemoryAllocateNext + // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, + // because minBlockCount is 0. + } + } +} + +VkResult VmaAllocator_T::Init(const VmaAllocatorCreateInfo* pCreateInfo) +{ + VkResult res = VK_SUCCESS; + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET + + return res; +} + +VmaAllocator_T::~VmaAllocator_T() +{ + VMA_ASSERT(m_Pools.IsEmpty()); + + for(size_t memTypeIndex = GetMemoryTypeCount(); memTypeIndex--; ) + { + vma_delete(this, m_pBlockVectors[memTypeIndex]); + } +} + +void VmaAllocator_T::ImportVulkanFunctions(const VmaVulkanFunctions* pVulkanFunctions) +{ +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Static(); +#endif + + if(pVulkanFunctions != VMA_NULL) + { + ImportVulkanFunctions_Custom(pVulkanFunctions); + } + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + ImportVulkanFunctions_Dynamic(); +#endif + + ValidateVulkanFunctions(); +} + +#if VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Static() +{ + // Vulkan 1.0 + m_VulkanFunctions.vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)vkGetInstanceProcAddr; + m_VulkanFunctions.vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetDeviceProcAddr; + m_VulkanFunctions.vkGetPhysicalDeviceProperties = (PFN_vkGetPhysicalDeviceProperties)vkGetPhysicalDeviceProperties; + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties)vkGetPhysicalDeviceMemoryProperties; + m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; + m_VulkanFunctions.vkFreeMemory = (PFN_vkFreeMemory)vkFreeMemory; + m_VulkanFunctions.vkMapMemory = (PFN_vkMapMemory)vkMapMemory; + m_VulkanFunctions.vkUnmapMemory = (PFN_vkUnmapMemory)vkUnmapMemory; + m_VulkanFunctions.vkFlushMappedMemoryRanges = (PFN_vkFlushMappedMemoryRanges)vkFlushMappedMemoryRanges; + m_VulkanFunctions.vkInvalidateMappedMemoryRanges = (PFN_vkInvalidateMappedMemoryRanges)vkInvalidateMappedMemoryRanges; + m_VulkanFunctions.vkBindBufferMemory = (PFN_vkBindBufferMemory)vkBindBufferMemory; + m_VulkanFunctions.vkBindImageMemory = (PFN_vkBindImageMemory)vkBindImageMemory; + m_VulkanFunctions.vkGetBufferMemoryRequirements = (PFN_vkGetBufferMemoryRequirements)vkGetBufferMemoryRequirements; + m_VulkanFunctions.vkGetImageMemoryRequirements = (PFN_vkGetImageMemoryRequirements)vkGetImageMemoryRequirements; + m_VulkanFunctions.vkCreateBuffer = (PFN_vkCreateBuffer)vkCreateBuffer; + m_VulkanFunctions.vkDestroyBuffer = (PFN_vkDestroyBuffer)vkDestroyBuffer; + m_VulkanFunctions.vkCreateImage = (PFN_vkCreateImage)vkCreateImage; + m_VulkanFunctions.vkDestroyImage = (PFN_vkDestroyImage)vkDestroyImage; + m_VulkanFunctions.vkCmdCopyBuffer = (PFN_vkCmdCopyBuffer)vkCmdCopyBuffer; + + // Vulkan 1.1 +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR = (PFN_vkGetBufferMemoryRequirements2)vkGetBufferMemoryRequirements2; + m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; + m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; + m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements = (PFN_vkGetDeviceBufferMemoryRequirements)vkGetDeviceBufferMemoryRequirements; + m_VulkanFunctions.vkGetDeviceImageMemoryRequirements = (PFN_vkGetDeviceImageMemoryRequirements)vkGetDeviceImageMemoryRequirements; + } +#endif +} + +#endif // VMA_STATIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVulkanFunctions) +{ + VMA_ASSERT(pVulkanFunctions != VMA_NULL); + +#define VMA_COPY_IF_NOT_NULL(funcName) \ + if(pVulkanFunctions->funcName != VMA_NULL) m_VulkanFunctions.funcName = pVulkanFunctions->funcName; + + VMA_COPY_IF_NOT_NULL(vkGetInstanceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetDeviceProcAddr); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceProperties); + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties); + VMA_COPY_IF_NOT_NULL(vkAllocateMemory); + VMA_COPY_IF_NOT_NULL(vkFreeMemory); + VMA_COPY_IF_NOT_NULL(vkMapMemory); + VMA_COPY_IF_NOT_NULL(vkUnmapMemory); + VMA_COPY_IF_NOT_NULL(vkFlushMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkInvalidateMappedMemoryRanges); + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory); + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkCreateBuffer); + VMA_COPY_IF_NOT_NULL(vkDestroyBuffer); + VMA_COPY_IF_NOT_NULL(vkCreateImage); + VMA_COPY_IF_NOT_NULL(vkDestroyImage); + VMA_COPY_IF_NOT_NULL(vkCmdCopyBuffer); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetBufferMemoryRequirements2KHR); + VMA_COPY_IF_NOT_NULL(vkGetImageMemoryRequirements2KHR); +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkBindBufferMemory2KHR); + VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); +#endif + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); + VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); +#endif + +#undef VMA_COPY_IF_NOT_NULL +} + +#if VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ImportVulkanFunctions_Dynamic() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetInstanceProcAddr && m_VulkanFunctions.vkGetDeviceProcAddr && + "To use VMA_DYNAMIC_VULKAN_FUNCTIONS in new versions of VMA you now have to pass " + "VmaVulkanFunctions::vkGetInstanceProcAddr and vkGetDeviceProcAddr as VmaAllocatorCreateInfo::pVulkanFunctions. " + "Other members can be null."); + +#define VMA_FETCH_INSTANCE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetInstanceProcAddr(m_hInstance, functionNameString); +#define VMA_FETCH_DEVICE_FUNC(memberName, functionPointerType, functionNameString) \ + if(m_VulkanFunctions.memberName == VMA_NULL) \ + m_VulkanFunctions.memberName = \ + (functionPointerType)m_VulkanFunctions.vkGetDeviceProcAddr(m_hDevice, functionNameString); + + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceProperties, PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties"); + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties, PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); + VMA_FETCH_DEVICE_FUNC(vkAllocateMemory, PFN_vkAllocateMemory, "vkAllocateMemory"); + VMA_FETCH_DEVICE_FUNC(vkFreeMemory, PFN_vkFreeMemory, "vkFreeMemory"); + VMA_FETCH_DEVICE_FUNC(vkMapMemory, PFN_vkMapMemory, "vkMapMemory"); + VMA_FETCH_DEVICE_FUNC(vkUnmapMemory, PFN_vkUnmapMemory, "vkUnmapMemory"); + VMA_FETCH_DEVICE_FUNC(vkFlushMappedMemoryRanges, PFN_vkFlushMappedMemoryRanges, "vkFlushMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkInvalidateMappedMemoryRanges, PFN_vkInvalidateMappedMemoryRanges, "vkInvalidateMappedMemoryRanges"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory, PFN_vkBindBufferMemory, "vkBindBufferMemory"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory, PFN_vkBindImageMemory, "vkBindImageMemory"); + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements, PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements, PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkCreateBuffer, PFN_vkCreateBuffer, "vkCreateBuffer"); + VMA_FETCH_DEVICE_FUNC(vkDestroyBuffer, PFN_vkDestroyBuffer, "vkDestroyBuffer"); + VMA_FETCH_DEVICE_FUNC(vkCreateImage, PFN_vkCreateImage, "vkCreateImage"); + VMA_FETCH_DEVICE_FUNC(vkDestroyImage, PFN_vkDestroyImage, "vkDestroyImage"); + VMA_FETCH_DEVICE_FUNC(vkCmdCopyBuffer, PFN_vkCmdCopyBuffer, "vkCmdCopyBuffer"); + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } +#endif + +#if VMA_DEDICATED_ALLOCATION + if(m_UseKhrDedicatedAllocation) + { + VMA_FETCH_DEVICE_FUNC(vkGetBufferMemoryRequirements2KHR, PFN_vkGetBufferMemoryRequirements2KHR, "vkGetBufferMemoryRequirements2KHR"); + VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2KHR, "vkGetImageMemoryRequirements2KHR"); + } +#endif + +#if VMA_BIND_MEMORY2 + if(m_UseKhrBindMemory2) + { + VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2KHR, "vkBindBufferMemory2KHR"); + VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2KHR, "vkBindImageMemory2KHR"); + } +#endif // #if VMA_BIND_MEMORY2 + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); + } +#endif // #if VMA_MEMORY_BUDGET + +#if VMA_VULKAN_VERSION >= 1003000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirements, "vkGetDeviceBufferMemoryRequirements"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); + } +#endif +#if VMA_KHR_MAINTENANCE4 + if(m_UseKhrMaintenance4) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); + } +#endif + +#undef VMA_FETCH_DEVICE_FUNC +#undef VMA_FETCH_INSTANCE_FUNC +} + +#endif // VMA_DYNAMIC_VULKAN_FUNCTIONS == 1 + +void VmaAllocator_T::ValidateVulkanFunctions() +{ + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkAllocateMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFreeMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkMapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkUnmapMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkFlushMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkInvalidateMappedMemoryRanges != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyBuffer != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCreateImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkDestroyImage != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkCmdCopyBuffer != VMA_NULL); + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrDedicatedAllocation) + { + VMA_ASSERT(m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkGetImageMemoryRequirements2KHR != VMA_NULL); + } +#endif + +#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0) || m_UseKhrBindMemory2) + { + VMA_ASSERT(m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL); + VMA_ASSERT(m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_UseExtMemoryBudget || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_ASSERT(m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR != VMA_NULL); + } +#endif + + // Not validating these due to suspected driver bugs with these function + // pointers being null despite correct extension or Vulkan version is enabled. + // See issue #397. Their usage in VMA is optional anyway. + // + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); +} + +VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) +{ + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + const bool isSmallHeap = heapSize <= VMA_SMALL_HEAP_MAX_SIZE; + return VmaAlignUp(isSmallHeap ? (heapSize / 8) : m_PreferredLargeHeapBlockSize, (VkDeviceSize)32); +} + +VkResult VmaAllocator_T::AllocateMemoryOfType( + VmaPool pool, + VkDeviceSize size, + VkDeviceSize alignment, + bool dedicatedPreferred, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + uint32_t memTypeIndex, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + VmaBlockVector& blockVector, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations != VMA_NULL); + VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); + + VmaAllocationCreateInfo finalCreateInfo = createInfo; + VkResult res = CalcMemTypeParams( + finalCreateInfo, + memTypeIndex, + size, + allocationCount); + if(res != VK_SUCCESS) + return res; + + if((finalCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + return AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + } + else + { + const bool canAllocateDedicated = + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0 && + (pool == VK_NULL_HANDLE || !blockVector.HasExplicitBlockSize()); + + if(canAllocateDedicated) + { + // Heuristics: Allocate dedicated memory if requested size if greater than half of preferred block size. + if(size > blockVector.GetPreferredBlockSize() / 2) + { + dedicatedPreferred = true; + } + // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, + // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above + // 3/4 of the maximum allocation count. + if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && + m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) + { + dedicatedPreferred = false; + } + + if(dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + } + + res = blockVector.Allocate( + size, + alignment, + finalCreateInfo, + suballocType, + allocationCount, + pAllocations); + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Try dedicated memory. + if(canAllocateDedicated && !dedicatedPreferred) + { + res = AllocateDedicatedMemory( + pool, + size, + suballocType, + dedicatedAllocations, + memTypeIndex, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, + (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, + finalCreateInfo.pUserData, + finalCreateInfo.priority, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + allocationCount, + pAllocations, + blockVector.GetAllocationNextPtr()); + if(res == VK_SUCCESS) + { + // Succeeded: AllocateDedicatedMemory function already filled pMemory, nothing more to do here. + VMA_DEBUG_LOG(" Allocated as DedicatedMemory"); + return VK_SUCCESS; + } + } + // Everything failed: Return error code. + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } +} + +VkResult VmaAllocator_T::AllocateDedicatedMemory( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + VmaDedicatedAllocationList& dedicatedAllocations, + uint32_t memTypeIndex, + bool map, + bool isUserDataString, + bool isMappingAllowed, + bool canAliasMemory, + void* pUserData, + float priority, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + size_t allocationCount, + VmaAllocation* pAllocations, + const void* pNextChain) +{ + VMA_ASSERT(allocationCount > 0 && pAllocations); + + VkMemoryAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; + allocInfo.memoryTypeIndex = memTypeIndex; + allocInfo.allocationSize = size; + allocInfo.pNext = pNextChain; + +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + VkMemoryDedicatedAllocateInfoKHR dedicatedAllocInfo = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR }; + if(!canAliasMemory) + { + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + if(dedicatedBuffer != VK_NULL_HANDLE) + { + VMA_ASSERT(dedicatedImage == VK_NULL_HANDLE); + dedicatedAllocInfo.buffer = dedicatedBuffer; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + dedicatedAllocInfo.image = dedicatedImage; + VmaPnextChainPushFront(&allocInfo, &dedicatedAllocInfo); + } + } + } +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + +#if VMA_BUFFER_DEVICE_ADDRESS + VkMemoryAllocateFlagsInfoKHR allocFlagsInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR }; + if(m_UseKhrBufferDeviceAddress) + { + bool canContainBufferWithDeviceAddress = true; + if(dedicatedBuffer != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || + dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); + } + else if(dedicatedImage != VK_NULL_HANDLE) + { + canContainBufferWithDeviceAddress = false; + } + if(canContainBufferWithDeviceAddress) + { + allocFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR; + VmaPnextChainPushFront(&allocInfo, &allocFlagsInfo); + } + } +#endif // #if VMA_BUFFER_DEVICE_ADDRESS + +#if VMA_MEMORY_PRIORITY + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + if(m_UseExtMemoryPriority) + { + VMA_ASSERT(priority >= 0.f && priority <= 1.f); + priorityInfo.priority = priority; + VmaPnextChainPushFront(&allocInfo, &priorityInfo); + } +#endif // #if VMA_MEMORY_PRIORITY + +#if VMA_EXTERNAL_MEMORY + // Attach VkExportMemoryAllocateInfoKHR if necessary. + VkExportMemoryAllocateInfoKHR exportMemoryAllocInfo = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR }; + exportMemoryAllocInfo.handleTypes = GetExternalMemoryHandleTypeFlags(memTypeIndex); + if(exportMemoryAllocInfo.handleTypes != 0) + { + VmaPnextChainPushFront(&allocInfo, &exportMemoryAllocInfo); + } +#endif // #if VMA_EXTERNAL_MEMORY + + size_t allocIndex; + VkResult res = VK_SUCCESS; + for(allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + res = AllocateDedicatedMemoryPage( + pool, + size, + suballocType, + memTypeIndex, + allocInfo, + map, + isUserDataString, + isMappingAllowed, + pUserData, + pAllocations + allocIndex); + if(res != VK_SUCCESS) + { + break; + } + } + + if(res == VK_SUCCESS) + { + for (allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + dedicatedAllocations.Register(pAllocations[allocIndex]); + } + VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); + } + else + { + // Free all already created allocations. + while(allocIndex--) + { + VmaAllocation currAlloc = pAllocations[allocIndex]; + VkDeviceMemory hMemory = currAlloc->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(currAlloc->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); + m_AllocationObjectAllocator.Free(currAlloc); + } + + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + } + + return res; +} + +VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( + VmaPool pool, + VkDeviceSize size, + VmaSuballocationType suballocType, + uint32_t memTypeIndex, + const VkMemoryAllocateInfo& allocInfo, + bool map, + bool isUserDataString, + bool isMappingAllowed, + void* pUserData, + VmaAllocation* pAllocation) +{ + VkDeviceMemory hMemory = VK_NULL_HANDLE; + VkResult res = AllocateVulkanMemory(&allocInfo, &hMemory); + if(res < 0) + { + VMA_DEBUG_LOG(" vkAllocateMemory FAILED"); + return res; + } + + void* pMappedData = VMA_NULL; + if(map) + { + res = (*m_VulkanFunctions.vkMapMemory)( + m_hDevice, + hMemory, + 0, + VK_WHOLE_SIZE, + 0, + &pMappedData); + if(res < 0) + { + VMA_DEBUG_LOG(" vkMapMemory FAILED"); + FreeVulkanMemory(memTypeIndex, size, hMemory); + return res; + } + } + + *pAllocation = m_AllocationObjectAllocator.Allocate(isMappingAllowed); + (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); + if (isUserDataString) + (*pAllocation)->SetName(this, (const char*)pUserData); + else + (*pAllocation)->SetUserData(this, pUserData); + m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::GetBufferMemoryRequirements( + VkBuffer hBuffer, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkBufferMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.buffer = hBuffer; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetBufferMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetBufferMemoryRequirements)(m_hDevice, hBuffer, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +void VmaAllocator_T::GetImageMemoryRequirements( + VkImage hImage, + VkMemoryRequirements& memReq, + bool& requiresDedicatedAllocation, + bool& prefersDedicatedAllocation) const +{ +#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + if(m_UseKhrDedicatedAllocation || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VkImageMemoryRequirementsInfo2KHR memReqInfo = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR }; + memReqInfo.image = hImage; + + VkMemoryDedicatedRequirementsKHR memDedicatedReq = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR }; + + VkMemoryRequirements2KHR memReq2 = { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR }; + VmaPnextChainPushFront(&memReq2, &memDedicatedReq); + + (*m_VulkanFunctions.vkGetImageMemoryRequirements2KHR)(m_hDevice, &memReqInfo, &memReq2); + + memReq = memReq2.memoryRequirements; + requiresDedicatedAllocation = (memDedicatedReq.requiresDedicatedAllocation != VK_FALSE); + prefersDedicatedAllocation = (memDedicatedReq.prefersDedicatedAllocation != VK_FALSE); + } + else +#endif // #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000 + { + (*m_VulkanFunctions.vkGetImageMemoryRequirements)(m_hDevice, hImage, &memReq); + requiresDedicatedAllocation = false; + prefersDedicatedAllocation = false; + } +} + +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VmaBufferImageUsage bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VMA_COUNT_BITS_SET(preferredFlags & ~currFlags) + + VMA_COUNT_BITS_SET(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + +VkResult VmaAllocator_T::CalcMemTypeParams( + VmaAllocationCreateInfo& inoutCreateInfo, + uint32_t memTypeIndex, + VkDeviceSize size, + size_t allocationCount) +{ + // If memory type is not HOST_VISIBLE, disable MAPPED. + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0 && + (m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + inoutCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT) != 0) + { + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); + VmaBudget heapBudget = {}; + GetHeapBudgets(&heapBudget, heapIndex, 1); + if(heapBudget.usage + size * allocationCount > heapBudget.budget) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + } + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::CalcAllocationParams( + VmaAllocationCreateInfo& inoutCreateInfo, + bool dedicatedRequired, + bool dedicatedPreferred) +{ + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. + if(dedicatedRequired || + inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + if(inoutCreateInfo.pool != VK_NULL_HANDLE) + { + if(inoutCreateInfo.pool->m_BlockVector.HasExplicitBlockSize() && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT while current custom pool doesn't support dedicated allocations."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + inoutCreateInfo.priority = inoutCreateInfo.pool->m_BlockVector.GetPriority(); + } + + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT) != 0 && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + VMA_ASSERT(0 && "Specifying VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT together with VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT makes no sense."); + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + if(VMA_DEBUG_ALWAYS_DEDICATED_MEMORY && + (inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) != 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + + return VK_SUCCESS; +} + +VkResult VmaAllocator_T::AllocateMemory( + const VkMemoryRequirements& vkMemReq, + bool requiresDedicatedAllocation, + bool prefersDedicatedAllocation, + VkBuffer dedicatedBuffer, + VkImage dedicatedImage, + VmaBufferImageUsage dedicatedBufferImageUsage, + const VmaAllocationCreateInfo& createInfo, + VmaSuballocationType suballocType, + size_t allocationCount, + VmaAllocation* pAllocations) +{ + memset(pAllocations, 0, sizeof(VmaAllocation) * allocationCount); + + VMA_ASSERT(VmaIsPow2(vkMemReq.alignment)); + + if(vkMemReq.size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VmaAllocationCreateInfo createInfoFinal = createInfo; + VkResult res = CalcAllocationParams(createInfoFinal, requiresDedicatedAllocation, prefersDedicatedAllocation); + if(res != VK_SUCCESS) + return res; + + if(createInfoFinal.pool != VK_NULL_HANDLE) + { + VmaBlockVector& blockVector = createInfoFinal.pool->m_BlockVector; + return AllocateMemoryOfType( + createInfoFinal.pool, + vkMemReq.size, + vkMemReq.alignment, + prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + blockVector.GetMemoryTypeIndex(), + suballocType, + createInfoFinal.pool->m_DedicatedAllocations, + blockVector, + allocationCount, + pAllocations); + } + else + { + // Bit mask of memory Vulkan types acceptable for this allocation. + uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; + uint32_t memTypeIndex = UINT32_MAX; + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. + if(res != VK_SUCCESS) + return res; + do + { + VmaBlockVector* blockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(blockVector && "Trying to use unsupported memory type!"); + res = AllocateMemoryOfType( + VK_NULL_HANDLE, + vkMemReq.size, + vkMemReq.alignment, + requiresDedicatedAllocation || prefersDedicatedAllocation, + dedicatedBuffer, + dedicatedImage, + dedicatedBufferImageUsage, + createInfoFinal, + memTypeIndex, + suballocType, + m_DedicatedAllocations[memTypeIndex], + *blockVector, + allocationCount, + pAllocations); + // Allocation succeeded + if(res == VK_SUCCESS) + return VK_SUCCESS; + + // Remove old memTypeIndex from list of possibilities. + memoryTypeBits &= ~(1u << memTypeIndex); + // Find alternative memTypeIndex. + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); + } while(res == VK_SUCCESS); + + // No other matching memory type index could be found. + // Not returning res, which is VK_ERROR_FEATURE_NOT_PRESENT, because we already failed to allocate once. + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } +} + +void VmaAllocator_T::FreeMemory( + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + VMA_ASSERT(pAllocations); + + for(size_t allocIndex = allocationCount; allocIndex--; ) + { + VmaAllocation allocation = pAllocations[allocIndex]; + + if(allocation != VK_NULL_HANDLE) + { + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) + { + FillAllocation(allocation, VMA_ALLOCATION_FILL_PATTERN_DESTROYED); + } + + allocation->FreeName(this); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaBlockVector* pBlockVector = VMA_NULL; + VmaPool hPool = allocation->GetParentPool(); + if(hPool != VK_NULL_HANDLE) + { + pBlockVector = &hPool->m_BlockVector; + } + else + { + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + pBlockVector = m_pBlockVectors[memTypeIndex]; + VMA_ASSERT(pBlockVector && "Trying to free memory of unsupported type!"); + } + pBlockVector->Free(allocation); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + FreeDedicatedMemory(allocation); + break; + default: + VMA_ASSERT(0); + } + } + } +} + +void VmaAllocator_T::CalculateStatistics(VmaTotalStatistics* pStats) +{ + // Initialize. + VmaClearDetailedStatistics(pStats->total); + for(uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; ++i) + VmaClearDetailedStatistics(pStats->memoryType[i]); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + VmaClearDetailedStatistics(pStats->memoryHeap[i]); + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if (pBlockVector != VMA_NULL) + pBlockVector->AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + const uint32_t memTypeIndex = blockVector.GetMemoryTypeIndex(); + blockVector.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + pool->m_DedicatedAllocations.AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + } + + // Process dedicated allocations. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + m_DedicatedAllocations[memTypeIndex].AddDetailedStatistics(pStats->memoryType[memTypeIndex]); + } + + // Sum from memory types to memory heaps. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + const uint32_t memHeapIndex = m_MemProps.memoryTypes[memTypeIndex].heapIndex; + VmaAddDetailedStatistics(pStats->memoryHeap[memHeapIndex], pStats->memoryType[memTypeIndex]); + } + + // Sum from memory heaps to total. + for(uint32_t memHeapIndex = 0; memHeapIndex < GetMemoryHeapCount(); ++memHeapIndex) + VmaAddDetailedStatistics(pStats->total, pStats->memoryHeap[memHeapIndex]); + + VMA_ASSERT(pStats->total.statistics.allocationCount == 0 || + pStats->total.allocationSizeMax >= pStats->total.allocationSizeMin); + VMA_ASSERT(pStats->total.unusedRangeCount == 0 || + pStats->total.unusedRangeSizeMax >= pStats->total.unusedRangeSizeMin); +} + +void VmaAllocator_T::GetHeapBudgets(VmaBudget* outBudgets, uint32_t firstHeap, uint32_t heapCount) +{ +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + if(m_Budget.m_OperationsSinceBudgetFetch < 30) + { + VmaMutexLockRead lockRead(m_Budget.m_BudgetMutex, m_UseMutex); + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + if(m_Budget.m_VulkanUsage[heapIndex] + outBudgets->statistics.blockBytes > m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]) + { + outBudgets->usage = m_Budget.m_VulkanUsage[heapIndex] + + outBudgets->statistics.blockBytes - m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + else + { + outBudgets->usage = 0; + } + + // Have to take MIN with heap size because explicit HeapSizeLimit is included in it. + outBudgets->budget = VMA_MIN( + m_Budget.m_VulkanBudget[heapIndex], m_MemProps.memoryHeaps[heapIndex].size); + } + } + else + { + UpdateVulkanBudget(); // Outside of mutex lock + GetHeapBudgets(outBudgets, firstHeap, heapCount); // Recursion + } + } + else +#endif + { + for(uint32_t i = 0; i < heapCount; ++i, ++outBudgets) + { + const uint32_t heapIndex = firstHeap + i; + + outBudgets->statistics.blockCount = m_Budget.m_BlockCount[heapIndex]; + outBudgets->statistics.allocationCount = m_Budget.m_AllocationCount[heapIndex]; + outBudgets->statistics.blockBytes = m_Budget.m_BlockBytes[heapIndex]; + outBudgets->statistics.allocationBytes = m_Budget.m_AllocationBytes[heapIndex]; + + outBudgets->usage = outBudgets->statistics.blockBytes; + outBudgets->budget = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + } +} + +void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo) +{ + pAllocationInfo->memoryType = hAllocation->GetMemoryTypeIndex(); + pAllocationInfo->deviceMemory = hAllocation->GetMemory(); + pAllocationInfo->offset = hAllocation->GetOffset(); + pAllocationInfo->size = hAllocation->GetSize(); + pAllocationInfo->pMappedData = hAllocation->GetMappedData(); + pAllocationInfo->pUserData = hAllocation->GetUserData(); + pAllocationInfo->pName = hAllocation->GetName(); +} + +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) +{ + GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); + + switch (hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); + pAllocationInfo->dedicatedMemory = VK_FALSE; + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; + pAllocationInfo->dedicatedMemory = VK_TRUE; + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) +{ + VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + + VmaPoolCreateInfo newCreateInfo = *pCreateInfo; + + // Protection against uninitialized new structure member. If garbage data are left there, this pointer dereference would crash. + if(pCreateInfo->pMemoryAllocateNext) + { + VMA_ASSERT(((const VkBaseInStructure*)pCreateInfo->pMemoryAllocateNext)->sType != 0); + } + + if(newCreateInfo.maxBlockCount == 0) + { + newCreateInfo.maxBlockCount = SIZE_MAX; + } + if(newCreateInfo.minBlockCount > newCreateInfo.maxBlockCount) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + // Memory type index out of range or forbidden. + if(pCreateInfo->memoryTypeIndex >= GetMemoryTypeCount() || + ((1u << pCreateInfo->memoryTypeIndex) & m_GlobalMemoryTypeBits) == 0) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + if(newCreateInfo.minAllocationAlignment > 0) + { + VMA_ASSERT(VmaIsPow2(newCreateInfo.minAllocationAlignment)); + } + + const VkDeviceSize preferredBlockSize = CalcPreferredBlockSize(newCreateInfo.memoryTypeIndex); + + *pPool = vma_new(this, VmaPool_T)(this, newCreateInfo, preferredBlockSize); + + VkResult res = (*pPool)->m_BlockVector.CreateMinBlocks(); + if(res != VK_SUCCESS) + { + vma_delete(this, *pPool); + *pPool = VMA_NULL; + return res; + } + + // Add to m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + (*pPool)->SetId(m_NextPoolId++); + m_Pools.PushBack(*pPool); + } + + return VK_SUCCESS; +} + +void VmaAllocator_T::DestroyPool(VmaPool pool) +{ + // Remove from m_Pools. + { + VmaMutexLockWrite lock(m_PoolsMutex, m_UseMutex); + m_Pools.Remove(pool); + } + + vma_delete(this, pool); +} + +void VmaAllocator_T::GetPoolStatistics(VmaPool pool, VmaStatistics* pPoolStats) +{ + VmaClearStatistics(*pPoolStats); + pool->m_BlockVector.AddStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddStatistics(*pPoolStats); +} + +void VmaAllocator_T::CalculatePoolStatistics(VmaPool pool, VmaDetailedStatistics* pPoolStats) +{ + VmaClearDetailedStatistics(*pPoolStats); + pool->m_BlockVector.AddDetailedStatistics(*pPoolStats); + pool->m_DedicatedAllocations.AddDetailedStatistics(*pPoolStats); +} + +void VmaAllocator_T::SetCurrentFrameIndex(uint32_t frameIndex) +{ + m_CurrentFrameIndex.store(frameIndex); + +#if VMA_MEMORY_BUDGET + if(m_UseExtMemoryBudget) + { + UpdateVulkanBudget(); + } +#endif // #if VMA_MEMORY_BUDGET +} + +VkResult VmaAllocator_T::CheckPoolCorruption(VmaPool hPool) +{ + return hPool->m_BlockVector.CheckCorruption(); +} + +VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) +{ + VkResult finalRes = VK_ERROR_FEATURE_NOT_PRESENT; + + // Process default pools. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* const pBlockVector = m_pBlockVectors[memTypeIndex]; + if(pBlockVector != VMA_NULL) + { + VkResult localRes = pBlockVector->CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + + // Process custom pools. + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + for(VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + if(((1u << pool->m_BlockVector.GetMemoryTypeIndex()) & memoryTypeBits) != 0) + { + VkResult localRes = pool->m_BlockVector.CheckCorruption(); + switch(localRes) + { + case VK_ERROR_FEATURE_NOT_PRESENT: + break; + case VK_SUCCESS: + finalRes = VK_SUCCESS; + break; + default: + return localRes; + } + } + } + } + + return finalRes; +} + +VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) +{ + AtomicTransactionalIncrement deviceMemoryCountIncrement; + const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); +#if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT + if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) + { + return VK_ERROR_TOO_MANY_OBJECTS; + } +#endif + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); + + // HeapSizeLimit is in effect for this heap. + if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) + { + const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; + VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; + for(;;) + { + const VkDeviceSize blockBytesAfterAllocation = blockBytes + pAllocateInfo->allocationSize; + if(blockBytesAfterAllocation > heapSize) + { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + if(m_Budget.m_BlockBytes[heapIndex].compare_exchange_strong(blockBytes, blockBytesAfterAllocation)) + { + break; + } + } + } + else + { + m_Budget.m_BlockBytes[heapIndex] += pAllocateInfo->allocationSize; + } + ++m_Budget.m_BlockCount[heapIndex]; + + // VULKAN CALL vkAllocateMemory. + VkResult res = (*m_VulkanFunctions.vkAllocateMemory)(m_hDevice, pAllocateInfo, GetAllocationCallbacks(), pMemory); + + if(res == VK_SUCCESS) + { +#if VMA_MEMORY_BUDGET + ++m_Budget.m_OperationsSinceBudgetFetch; +#endif + + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnAllocate)(this, pAllocateInfo->memoryTypeIndex, *pMemory, pAllocateInfo->allocationSize, m_DeviceMemoryCallbacks.pUserData); + } + + deviceMemoryCountIncrement.Commit(); + } + else + { + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= pAllocateInfo->allocationSize; + } + + return res; +} + +void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, VkDeviceMemory hMemory) +{ + // Informative callback. + if(m_DeviceMemoryCallbacks.pfnFree != VMA_NULL) + { + (*m_DeviceMemoryCallbacks.pfnFree)(this, memoryType, hMemory, size, m_DeviceMemoryCallbacks.pUserData); + } + + // VULKAN CALL vkFreeMemory. + (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); + + const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); + --m_Budget.m_BlockCount[heapIndex]; + m_Budget.m_BlockBytes[heapIndex] -= size; + + --m_DeviceMemoryCount; +} + +VkResult VmaAllocator_T::BindVulkanBuffer( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkBuffer buffer, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindBufferMemory2KHR != VMA_NULL) + { + VkBindBufferMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.buffer = buffer; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindBufferMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindBufferMemory)(m_hDevice, buffer, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::BindVulkanImage( + VkDeviceMemory memory, + VkDeviceSize memoryOffset, + VkImage image, + const void* pNext) +{ + if(pNext != VMA_NULL) + { +#if VMA_VULKAN_VERSION >= 1001000 || VMA_BIND_MEMORY2 + if((m_UseKhrBindMemory2 || m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) && + m_VulkanFunctions.vkBindImageMemory2KHR != VMA_NULL) + { + VkBindImageMemoryInfoKHR bindBufferMemoryInfo = { VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR }; + bindBufferMemoryInfo.pNext = pNext; + bindBufferMemoryInfo.image = image; + bindBufferMemoryInfo.memory = memory; + bindBufferMemoryInfo.memoryOffset = memoryOffset; + return (*m_VulkanFunctions.vkBindImageMemory2KHR)(m_hDevice, 1, &bindBufferMemoryInfo); + } + else +#endif // #if VMA_BIND_MEMORY2 + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } + } + else + { + return (*m_VulkanFunctions.vkBindImageMemory)(m_hDevice, image, memory, memoryOffset); + } +} + +VkResult VmaAllocator_T::Map(VmaAllocation hAllocation, void** ppData) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + char *pBytes = VMA_NULL; + VkResult res = pBlock->Map(this, 1, (void**)&pBytes); + if(res == VK_SUCCESS) + { + *ppData = pBytes + (ptrdiff_t)hAllocation->GetOffset(); + hAllocation->BlockAllocMap(); + } + return res; + } + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + return hAllocation->DedicatedAllocMap(this, ppData); + default: + VMA_ASSERT(0); + return VK_ERROR_MEMORY_MAP_FAILED; + } +} + +void VmaAllocator_T::Unmap(VmaAllocation hAllocation) +{ + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + hAllocation->BlockAllocUnmap(); + pBlock->Unmap(this, 1); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + hAllocation->DedicatedAllocUnmap(this); + break; + default: + VMA_ASSERT(0); + } +} + +VkResult VmaAllocator_T::BindBufferMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkBuffer hBuffer, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanBuffer(hAllocation->GetMemory(), allocationLocalOffset, hBuffer, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* const pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding buffer to allocation that doesn't belong to any block."); + res = pBlock->BindBufferMemory(this, hAllocation, allocationLocalOffset, hBuffer, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::BindImageMemory( + VmaAllocation hAllocation, + VkDeviceSize allocationLocalOffset, + VkImage hImage, + const void* pNext) +{ + VkResult res = VK_ERROR_UNKNOWN_COPY; + switch(hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + res = BindVulkanImage(hAllocation->GetMemory(), allocationLocalOffset, hImage, pNext); + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + VmaDeviceMemoryBlock* pBlock = hAllocation->GetBlock(); + VMA_ASSERT(pBlock && "Binding image to allocation that doesn't belong to any block."); + res = pBlock->BindImageMemory(this, hAllocation, allocationLocalOffset, hImage, pNext); + break; + } + default: + VMA_ASSERT(0); + } + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocation( + VmaAllocation hAllocation, + VkDeviceSize offset, VkDeviceSize size, + VMA_CACHE_OPERATION op) +{ + VkResult res = VK_SUCCESS; + + VkMappedMemoryRange memRange = {}; + if(GetFlushOrInvalidateRange(hAllocation, offset, size, memRange)) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, 1, &memRange); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::FlushOrInvalidateAllocations( + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + VMA_CACHE_OPERATION op) +{ + typedef VmaStlAllocator RangeAllocator; + typedef VmaSmallVector RangeVector; + RangeVector ranges = RangeVector(RangeAllocator(GetAllocationCallbacks())); + + for(uint32_t allocIndex = 0; allocIndex < allocationCount; ++allocIndex) + { + const VmaAllocation alloc = allocations[allocIndex]; + const VkDeviceSize offset = offsets != VMA_NULL ? offsets[allocIndex] : 0; + const VkDeviceSize size = sizes != VMA_NULL ? sizes[allocIndex] : VK_WHOLE_SIZE; + VkMappedMemoryRange newRange; + if(GetFlushOrInvalidateRange(alloc, offset, size, newRange)) + { + ranges.push_back(newRange); + } + } + + VkResult res = VK_SUCCESS; + if(!ranges.empty()) + { + switch(op) + { + case VMA_CACHE_FLUSH: + res = (*GetVulkanFunctions().vkFlushMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + case VMA_CACHE_INVALIDATE: + res = (*GetVulkanFunctions().vkInvalidateMappedMemoryRanges)(m_hDevice, (uint32_t)ranges.size(), ranges.data()); + break; + default: + VMA_ASSERT(0); + } + } + // else: Just ignore this call. + return res; +} + +VkResult VmaAllocator_T::CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + void* dstMappedData = VMA_NULL; + VkResult res = Map(dstAllocation, &dstMappedData); + if(res == VK_SUCCESS) + { + memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); + Unmap(dstAllocation); + res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); + } + return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + void* srcMappedData = VMA_NULL; + VkResult res = Map(srcAllocation, &srcMappedData); + if(res == VK_SUCCESS) + { + res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); + if(res == VK_SUCCESS) + { + memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); + Unmap(srcAllocation); + } + } + return res; +} + +void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) +{ + VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); + + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + VmaPool parentPool = allocation->GetParentPool(); + if(parentPool == VK_NULL_HANDLE) + { + // Default pool + m_DedicatedAllocations[memTypeIndex].Unregister(allocation); + } + else + { + // Custom pool + parentPool->m_DedicatedAllocations.Unregister(allocation); + } + + VkDeviceMemory hMemory = allocation->GetMemory(); + + /* + There is no need to call this, because Vulkan spec allows to skip vkUnmapMemory + before vkFreeMemory. + + if(allocation->GetMappedData() != VMA_NULL) + { + (*m_VulkanFunctions.vkUnmapMemory)(m_hDevice, hMemory); + } + */ + + FreeVulkanMemory(memTypeIndex, allocation->GetSize(), hMemory); + + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); + m_AllocationObjectAllocator.Free(allocation); + + VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); +} + +uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const +{ + VkBufferCreateInfo dummyBufCreateInfo; + VmaFillGpuDefragmentationBufferCreateInfo(dummyBufCreateInfo); + + uint32_t memoryTypeBits = 0; + + // Create buffer. + VkBuffer buf = VK_NULL_HANDLE; + VkResult res = (*GetVulkanFunctions().vkCreateBuffer)( + m_hDevice, &dummyBufCreateInfo, GetAllocationCallbacks(), &buf); + if(res == VK_SUCCESS) + { + // Query for supported memory types. + VkMemoryRequirements memReq; + (*GetVulkanFunctions().vkGetBufferMemoryRequirements)(m_hDevice, buf, &memReq); + memoryTypeBits = memReq.memoryTypeBits; + + // Destroy buffer. + (*GetVulkanFunctions().vkDestroyBuffer)(m_hDevice, buf, GetAllocationCallbacks()); + } + + return memoryTypeBits; +} + +uint32_t VmaAllocator_T::CalculateGlobalMemoryTypeBits() const +{ + // Make sure memory information is already fetched. + VMA_ASSERT(GetMemoryTypeCount() > 0); + + uint32_t memoryTypeBits = UINT32_MAX; + + if(!m_UseAmdDeviceCoherentMemory) + { + // Exclude memory types that have VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD. + for(uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + if((m_MemProps.memoryTypes[memTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) != 0) + { + memoryTypeBits &= ~(1u << memTypeIndex); + } + } + } + + return memoryTypeBits; +} + +bool VmaAllocator_T::GetFlushOrInvalidateRange( + VmaAllocation allocation, + VkDeviceSize offset, VkDeviceSize size, + VkMappedMemoryRange& outRange) const +{ + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + if(size > 0 && IsMemoryTypeNonCoherent(memTypeIndex)) + { + const VkDeviceSize nonCoherentAtomSize = m_PhysicalDeviceProperties.limits.nonCoherentAtomSize; + const VkDeviceSize allocationSize = allocation->GetSize(); + VMA_ASSERT(offset <= allocationSize); + + outRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + outRange.pNext = VMA_NULL; + outRange.memory = allocation->GetMemory(); + + switch(allocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + outRange.size = allocationSize - outRange.offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + outRange.size = VMA_MIN( + VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize), + allocationSize - outRange.offset); + } + break; + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + { + // 1. Still within this allocation. + outRange.offset = VmaAlignDown(offset, nonCoherentAtomSize); + if(size == VK_WHOLE_SIZE) + { + size = allocationSize - offset; + } + else + { + VMA_ASSERT(offset + size <= allocationSize); + } + outRange.size = VmaAlignUp(size + (offset - outRange.offset), nonCoherentAtomSize); + + // 2. Adjust to whole block. + const VkDeviceSize allocationOffset = allocation->GetOffset(); + VMA_ASSERT(allocationOffset % nonCoherentAtomSize == 0); + const VkDeviceSize blockSize = allocation->GetBlock()->m_pMetadata->GetSize(); + outRange.offset += allocationOffset; + outRange.size = VMA_MIN(outRange.size, blockSize - outRange.offset); + + break; + } + default: + VMA_ASSERT(0); + } + return true; + } + return false; +} + +#if VMA_MEMORY_BUDGET +void VmaAllocator_T::UpdateVulkanBudget() +{ + VMA_ASSERT(m_UseExtMemoryBudget); + + VkPhysicalDeviceMemoryProperties2KHR memProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR }; + + VkPhysicalDeviceMemoryBudgetPropertiesEXT budgetProps = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT }; + VmaPnextChainPushFront(&memProps, &budgetProps); + + GetVulkanFunctions().vkGetPhysicalDeviceMemoryProperties2KHR(m_PhysicalDevice, &memProps); + + { + VmaMutexLockWrite lockWrite(m_Budget.m_BudgetMutex, m_UseMutex); + + for(uint32_t heapIndex = 0; heapIndex < GetMemoryHeapCount(); ++heapIndex) + { + m_Budget.m_VulkanUsage[heapIndex] = budgetProps.heapUsage[heapIndex]; + m_Budget.m_VulkanBudget[heapIndex] = budgetProps.heapBudget[heapIndex]; + m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] = m_Budget.m_BlockBytes[heapIndex].load(); + + // Some bugged drivers return the budget incorrectly, e.g. 0 or much bigger than heap size. + if(m_Budget.m_VulkanBudget[heapIndex] == 0) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size * 8 / 10; // 80% heuristics. + } + else if(m_Budget.m_VulkanBudget[heapIndex] > m_MemProps.memoryHeaps[heapIndex].size) + { + m_Budget.m_VulkanBudget[heapIndex] = m_MemProps.memoryHeaps[heapIndex].size; + } + if(m_Budget.m_VulkanUsage[heapIndex] == 0 && m_Budget.m_BlockBytesAtBudgetFetch[heapIndex] > 0) + { + m_Budget.m_VulkanUsage[heapIndex] = m_Budget.m_BlockBytesAtBudgetFetch[heapIndex]; + } + } + m_Budget.m_OperationsSinceBudgetFetch = 0; + } +} +#endif // VMA_MEMORY_BUDGET + +void VmaAllocator_T::FillAllocation(const VmaAllocation hAllocation, uint8_t pattern) +{ + if(VMA_DEBUG_INITIALIZE_ALLOCATIONS && + hAllocation->IsMappingAllowed() && + (m_MemProps.memoryTypes[hAllocation->GetMemoryTypeIndex()].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) + { + void* pData = VMA_NULL; + VkResult res = Map(hAllocation, &pData); + if(res == VK_SUCCESS) + { + memset(pData, (int)pattern, (size_t)hAllocation->GetSize()); + FlushOrInvalidateAllocation(hAllocation, 0, VK_WHOLE_SIZE, VMA_CACHE_FLUSH); + Unmap(hAllocation); + } + else + { + VMA_ASSERT(0 && "VMA_DEBUG_INITIALIZE_ALLOCATIONS is enabled, but couldn't map memory to fill allocation."); + } + } +} + +uint32_t VmaAllocator_T::GetGpuDefragmentationMemoryTypeBits() +{ + uint32_t memoryTypeBits = m_GpuDefragmentationMemoryTypeBits.load(); + if(memoryTypeBits == UINT32_MAX) + { + memoryTypeBits = CalculateGpuDefragmentationMemoryTypeBits(); + m_GpuDefragmentationMemoryTypeBits.store(memoryTypeBits); + } + return memoryTypeBits; +} + +#if VMA_STATS_STRING_ENABLED +void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) +{ + json.WriteString("DefaultPools"); + json.BeginObject(); + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + VmaBlockVector* pBlockVector = m_pBlockVectors[memTypeIndex]; + VmaDedicatedAllocationList& dedicatedAllocList = m_DedicatedAllocations[memTypeIndex]; + if (pBlockVector != VMA_NULL) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("PreferredBlockSize"); + json.WriteNumber(pBlockVector->GetPreferredBlockSize()); + + json.WriteString("Blocks"); + pBlockVector->PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + dedicatedAllocList.BuildStatsString(json); + } + json.EndObject(); + } + } + } + json.EndObject(); + + json.WriteString("CustomPools"); + json.BeginObject(); + { + VmaMutexLockRead lock(m_PoolsMutex, m_UseMutex); + if (!m_Pools.IsEmpty()) + { + for (uint32_t memTypeIndex = 0; memTypeIndex < GetMemoryTypeCount(); ++memTypeIndex) + { + bool displayType = true; + size_t index = 0; + for (VmaPool pool = m_Pools.Front(); pool != VMA_NULL; pool = m_Pools.GetNext(pool)) + { + VmaBlockVector& blockVector = pool->m_BlockVector; + if (blockVector.GetMemoryTypeIndex() == memTypeIndex) + { + if (displayType) + { + json.BeginString("Type "); + json.ContinueString(memTypeIndex); + json.EndString(); + json.BeginArray(); + displayType = false; + } + + json.BeginObject(); + { + json.WriteString("Name"); + json.BeginString(); + json.ContinueString((uint64_t)index++); + if (pool->GetName()) + { + json.ContinueString(" - "); + json.ContinueString(pool->GetName()); + } + json.EndString(); + + json.WriteString("PreferredBlockSize"); + json.WriteNumber(blockVector.GetPreferredBlockSize()); + + json.WriteString("Blocks"); + blockVector.PrintDetailedMap(json); + + json.WriteString("DedicatedAllocations"); + pool->m_DedicatedAllocations.BuildStatsString(json); + } + json.EndObject(); + } + } + + if (!displayType) + json.EndArray(); + } + } + } + json.EndObject(); +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_ALLOCATOR_T_FUNCTIONS + + +#ifndef _VMA_PUBLIC_INTERFACE +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAllocator( + const VmaAllocatorCreateInfo* pCreateInfo, + VmaAllocator* pAllocator) +{ + VMA_ASSERT(pCreateInfo && pAllocator); + VMA_ASSERT(pCreateInfo->vulkanApiVersion == 0 || + (VK_VERSION_MAJOR(pCreateInfo->vulkanApiVersion) == 1 && VK_VERSION_MINOR(pCreateInfo->vulkanApiVersion) <= 3)); + VMA_DEBUG_LOG("vmaCreateAllocator"); + *pAllocator = vma_new(pCreateInfo->pAllocationCallbacks, VmaAllocator_T)(pCreateInfo); + VkResult result = (*pAllocator)->Init(pCreateInfo); + if(result < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pAllocator); + *pAllocator = VK_NULL_HANDLE; + } + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyAllocator( + VmaAllocator allocator) +{ + if(allocator != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyAllocator"); + VkAllocationCallbacks allocationCallbacks = allocator->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, allocator); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocatorInfo(VmaAllocator allocator, VmaAllocatorInfo* pAllocatorInfo) +{ + VMA_ASSERT(allocator && pAllocatorInfo); + pAllocatorInfo->instance = allocator->m_hInstance; + pAllocatorInfo->physicalDevice = allocator->GetPhysicalDevice(); + pAllocatorInfo->device = allocator->m_hDevice; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPhysicalDeviceProperties( + VmaAllocator allocator, + const VkPhysicalDeviceProperties **ppPhysicalDeviceProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceProperties); + *ppPhysicalDeviceProperties = &allocator->m_PhysicalDeviceProperties; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryProperties( + VmaAllocator allocator, + const VkPhysicalDeviceMemoryProperties** ppPhysicalDeviceMemoryProperties) +{ + VMA_ASSERT(allocator && ppPhysicalDeviceMemoryProperties); + *ppPhysicalDeviceMemoryProperties = &allocator->m_MemProps; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetMemoryTypeProperties( + VmaAllocator allocator, + uint32_t memoryTypeIndex, + VkMemoryPropertyFlags* pFlags) +{ + VMA_ASSERT(allocator && pFlags); + VMA_ASSERT(memoryTypeIndex < allocator->GetMemoryTypeCount()); + *pFlags = allocator->m_MemProps.memoryTypes[memoryTypeIndex].propertyFlags; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetCurrentFrameIndex( + VmaAllocator allocator, + uint32_t frameIndex) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->SetCurrentFrameIndex(frameIndex); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateStatistics( + VmaAllocator allocator, + VmaTotalStatistics* pStats) +{ + VMA_ASSERT(allocator && pStats); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->CalculateStatistics(pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetHeapBudgets( + VmaAllocator allocator, + VmaBudget* pBudgets) +{ + VMA_ASSERT(allocator && pBudgets); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + allocator->GetHeapBudgets(pBudgets, 0, allocator->GetMemoryHeapCount()); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildStatsString( + VmaAllocator allocator, + char** ppStatsString, + VkBool32 detailedMap) +{ + VMA_ASSERT(allocator && ppStatsString); + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VmaStringBuilder sb(allocator->GetAllocationCallbacks()); + { + VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; + allocator->GetHeapBudgets(budgets, 0, allocator->GetMemoryHeapCount()); + + VmaTotalStatistics stats; + allocator->CalculateStatistics(&stats); + + VmaJsonWriter json(allocator->GetAllocationCallbacks(), sb); + json.BeginObject(); + { + json.WriteString("General"); + json.BeginObject(); + { + const VkPhysicalDeviceProperties& deviceProperties = allocator->m_PhysicalDeviceProperties; + const VkPhysicalDeviceMemoryProperties& memoryProperties = allocator->m_MemProps; + + json.WriteString("API"); + json.WriteString("Vulkan"); + + json.WriteString("apiVersion"); + json.BeginString(); + json.ContinueString(VK_VERSION_MAJOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_MINOR(deviceProperties.apiVersion)); + json.ContinueString("."); + json.ContinueString(VK_VERSION_PATCH(deviceProperties.apiVersion)); + json.EndString(); + + json.WriteString("GPU"); + json.WriteString(deviceProperties.deviceName); + json.WriteString("deviceType"); + json.WriteNumber(static_cast(deviceProperties.deviceType)); + + json.WriteString("maxMemoryAllocationCount"); + json.WriteNumber(deviceProperties.limits.maxMemoryAllocationCount); + json.WriteString("bufferImageGranularity"); + json.WriteNumber(deviceProperties.limits.bufferImageGranularity); + json.WriteString("nonCoherentAtomSize"); + json.WriteNumber(deviceProperties.limits.nonCoherentAtomSize); + + json.WriteString("memoryHeapCount"); + json.WriteNumber(memoryProperties.memoryHeapCount); + json.WriteString("memoryTypeCount"); + json.WriteNumber(memoryProperties.memoryTypeCount); + } + json.EndObject(); + } + { + json.WriteString("Total"); + VmaPrintDetailedStatistics(json, stats.total); + } + { + json.WriteString("MemoryInfo"); + json.BeginObject(); + { + for (uint32_t heapIndex = 0; heapIndex < allocator->GetMemoryHeapCount(); ++heapIndex) + { + json.BeginString("Heap "); + json.ContinueString(heapIndex); + json.EndString(); + json.BeginObject(); + { + const VkMemoryHeap& heapInfo = allocator->m_MemProps.memoryHeaps[heapIndex]; + json.WriteString("Flags"); + json.BeginArray(true); + { + if (heapInfo.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + #if VMA_VULKAN_VERSION >= 1001000 + if (heapInfo.flags & VK_MEMORY_HEAP_MULTI_INSTANCE_BIT) + json.WriteString("MULTI_INSTANCE"); + #endif + + VkMemoryHeapFlags flags = heapInfo.flags & + ~(VK_MEMORY_HEAP_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_HEAP_MULTI_INSTANCE_BIT + #endif + ); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Size"); + json.WriteNumber(heapInfo.size); + + json.WriteString("Budget"); + json.BeginObject(); + { + json.WriteString("BudgetBytes"); + json.WriteNumber(budgets[heapIndex].budget); + json.WriteString("UsageBytes"); + json.WriteNumber(budgets[heapIndex].usage); + } + json.EndObject(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryHeap[heapIndex]); + + json.WriteString("MemoryPools"); + json.BeginObject(); + { + for (uint32_t typeIndex = 0; typeIndex < allocator->GetMemoryTypeCount(); ++typeIndex) + { + if (allocator->MemoryTypeIndexToHeapIndex(typeIndex) == heapIndex) + { + json.BeginString("Type "); + json.ContinueString(typeIndex); + json.EndString(); + json.BeginObject(); + { + json.WriteString("Flags"); + json.BeginArray(true); + { + VkMemoryPropertyFlags flags = allocator->m_MemProps.memoryTypes[typeIndex].propertyFlags; + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + json.WriteString("DEVICE_LOCAL"); + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + json.WriteString("HOST_VISIBLE"); + if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + json.WriteString("HOST_COHERENT"); + if (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) + json.WriteString("HOST_CACHED"); + if (flags & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) + json.WriteString("LAZILY_ALLOCATED"); + #if VMA_VULKAN_VERSION >= 1001000 + if (flags & VK_MEMORY_PROPERTY_PROTECTED_BIT) + json.WriteString("PROTECTED"); + #endif + #if VK_AMD_device_coherent_memory + if (flags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY) + json.WriteString("DEVICE_COHERENT_AMD"); + if (flags & VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY) + json.WriteString("DEVICE_UNCACHED_AMD"); + #endif + + flags &= ~(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + #if VMA_VULKAN_VERSION >= 1001000 + | VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT + #endif + #if VK_AMD_device_coherent_memory + | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY + | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY + #endif + | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (flags != 0) + json.WriteNumber(flags); + } + json.EndArray(); + + json.WriteString("Stats"); + VmaPrintDetailedStatistics(json, stats.memoryType[typeIndex]); + } + json.EndObject(); + } + } + + } + json.EndObject(); + } + json.EndObject(); + } + } + json.EndObject(); + } + + if (detailedMap == VK_TRUE) + allocator->PrintDetailedMap(json); + + json.EndObject(); + } + + *ppStatsString = VmaCreateStringCopy(allocator->GetAllocationCallbacks(), sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( + VmaAllocator allocator, + char* pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(allocator); + VmaFreeString(allocator->GetAllocationCallbacks(), pStatsString); + } +} + +#endif // VMA_STATS_STRING_ENABLED + +/* +This function is not protected by any mutex because it just reads immutable data. +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( + VmaAllocator allocator, + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pBufferCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceBufferMemoryRequirements) + { + // Can query straight from VkBufferCreateInfo :) + VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; + devBufMemReq.pCreateInfo = pBufferCreateInfo; + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy buffer to query :( + VkBuffer hBuffer = VK_NULL_HANDLE; + res = funcs->vkCreateBuffer( + hDev, pBufferCreateInfo, allocator->GetAllocationCallbacks(), &hBuffer); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); + + funcs->vkDestroyBuffer( + hDev, hBuffer, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + uint32_t* pMemoryTypeIndex) +{ + VMA_ASSERT(allocator != VK_NULL_HANDLE); + VMA_ASSERT(pImageCreateInfo != VMA_NULL); + VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); + VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); + + const VkDevice hDev = allocator->m_hDevice; + const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); + VkResult res; + +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + if(funcs->vkGetDeviceImageMemoryRequirements) + { + // Can query straight from VkImageCreateInfo :) + VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; + devImgMemReq.pCreateInfo = pImageCreateInfo; + VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && + "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); + + VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; + (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + } + else +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 + { + // Must create a dummy image to query :( + VkImage hImage = VK_NULL_HANDLE; + res = funcs->vkCreateImage( + hDev, pImageCreateInfo, allocator->GetAllocationCallbacks(), &hImage); + if(res == VK_SUCCESS) + { + VkMemoryRequirements memReq = {}; + funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); + + res = allocator->FindMemoryTypeIndex( + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); + + funcs->vkDestroyImage( + hDev, hImage, allocator->GetAllocationCallbacks()); + } + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreatePool( + VmaAllocator allocator, + const VmaPoolCreateInfo* pCreateInfo, + VmaPool* pPool) +{ + VMA_ASSERT(allocator && pCreateInfo && pPool); + + VMA_DEBUG_LOG("vmaCreatePool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CreatePool(pCreateInfo, pPool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyPool( + VmaAllocator allocator, + VmaPool pool) +{ + VMA_ASSERT(allocator); + + if(pool == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyPool"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->DestroyPool(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetPoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculatePoolStatistics( + VmaAllocator allocator, + VmaPool pool, + VmaDetailedStatistics* pPoolStats) +{ + VMA_ASSERT(allocator && pool && pPoolStats); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->CalculatePoolStatistics(pool, pPoolStats); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckPoolCorruption(VmaAllocator allocator, VmaPool pool) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VMA_DEBUG_LOG("vmaCheckPoolCorruption"); + + return allocator->CheckPoolCorruption(pool); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char** ppName) +{ + VMA_ASSERT(allocator && pool && ppName); + + VMA_DEBUG_LOG("vmaGetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *ppName = pool->GetName(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetPoolName( + VmaAllocator allocator, + VmaPool pool, + const char* pName) +{ + VMA_ASSERT(allocator && pool); + + VMA_DEBUG_LOG("vmaSetPoolName"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + pool->SetName(pName); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( + VmaAllocator allocator, + const VkMemoryRequirements* pVkMemoryRequirements, + const VmaAllocationCreateInfo* pCreateInfo, + size_t allocationCount, + VmaAllocation* pAllocations, + VmaAllocationInfo* pAllocationInfo) +{ + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocator && pVkMemoryRequirements && pCreateInfo && pAllocations); + + VMA_DEBUG_LOG("vmaAllocateMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkResult result = allocator->AllocateMemory( + *pVkMemoryRequirements, + false, // requiresDedicatedAllocation + false, // prefersDedicatedAllocation + VK_NULL_HANDLE, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_UNKNOWN, + allocationCount, + pAllocations); + + if(pAllocationInfo != VMA_NULL && result == VK_SUCCESS) + { + for(size_t i = 0; i < allocationCount; ++i) + { + allocator->GetAllocationInfo(pAllocations[i], pAllocationInfo + i); + } + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( + VmaAllocator allocator, + VkBuffer buffer, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && buffer != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(buffer, vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + buffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( + VmaAllocator allocator, + VkImage image, + const VmaAllocationCreateInfo* pCreateInfo, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && image != VK_NULL_HANDLE && pCreateInfo && pAllocation); + + VMA_DEBUG_LOG("vmaAllocateMemoryForImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(image, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + VkResult result = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + image, // dedicatedImage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage + *pCreateInfo, + VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, + 1, // allocationCount + pAllocation); + + if(pAllocationInfo && result == VK_SUCCESS) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return result; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaFreeMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory( + 1, // allocationCount + &allocation); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeMemoryPages( + VmaAllocator allocator, + size_t allocationCount, + const VmaAllocation* pAllocations) +{ + if(allocationCount == 0) + { + return; + } + + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaFreeMemoryPages"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->FreeMemory(allocationCount, pAllocations); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo2* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo2(allocation, pAllocationInfo); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( + VmaAllocator allocator, + VmaAllocation allocation, + void* pUserData) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocation->SetUserData(allocator, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationName( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const char* VMA_NULLABLE pName) +{ + allocation->SetName(allocator, pName); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationMemoryProperties( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkMemoryPropertyFlags* VMA_NOT_NULL pFlags) +{ + VMA_ASSERT(allocator && allocation && pFlags); + const uint32_t memTypeIndex = allocation->GetMemoryTypeIndex(); + *pFlags = allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaMapMemory( + VmaAllocator allocator, + VmaAllocation allocation, + void** ppData) +{ + VMA_ASSERT(allocator && allocation && ppData); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->Map(allocation, ppData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaUnmapMemory( + VmaAllocator allocator, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->Unmap(allocation); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaFlushAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize offset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && allocation); + + VMA_DEBUG_LOG("vmaInvalidateAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaFlushAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( + VmaAllocator allocator, + uint32_t allocationCount, + const VmaAllocation* allocations, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) +{ + VMA_ASSERT(allocator); + + if(allocationCount == 0) + { + return VK_SUCCESS; + } + + VMA_ASSERT(allocations); + + VMA_DEBUG_LOG("vmaInvalidateAllocations"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator allocator, + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator allocator, + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( + VmaAllocator allocator, + uint32_t memoryTypeBits) +{ + VMA_ASSERT(allocator); + + VMA_DEBUG_LOG("vmaCheckCorruption"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CheckCorruption(memoryTypeBits); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentation( + VmaAllocator allocator, + const VmaDefragmentationInfo* pInfo, + VmaDefragmentationContext* pContext) +{ + VMA_ASSERT(allocator && pInfo && pContext); + + VMA_DEBUG_LOG("vmaBeginDefragmentation"); + + if (pInfo->pool != VMA_NULL) + { + // Check if run on supported algorithms + if (pInfo->pool->m_BlockVector.GetAlgorithm() & VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT) + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pContext = vma_new(allocator, VmaDefragmentationContext_T)(allocator, *pInfo); + return VK_SUCCESS; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaEndDefragmentation( + VmaAllocator allocator, + VmaDefragmentationContext context, + VmaDefragmentationStats* pStats) +{ + VMA_ASSERT(allocator && context); + + VMA_DEBUG_LOG("vmaEndDefragmentation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if (pStats) + context->GetStats(*pStats); + vma_delete(allocator, context); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBeginDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaBeginDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassBegin(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaEndDefragmentationPass( + VmaAllocator VMA_NOT_NULL allocator, + VmaDefragmentationContext VMA_NOT_NULL context, + VmaDefragmentationPassMoveInfo* VMA_NOT_NULL pPassInfo) +{ + VMA_ASSERT(context && pPassInfo); + + VMA_DEBUG_LOG("vmaEndDefragmentationPass"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return context->DefragmentPassEnd(*pPassInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkBuffer buffer) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, 0, buffer, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkBuffer buffer, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && buffer); + + VMA_DEBUG_LOG("vmaBindBufferMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindBufferMemory(allocation, allocationLocalOffset, buffer, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory( + VmaAllocator allocator, + VmaAllocation allocation, + VkImage image) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, 0, image, VMA_NULL); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( + VmaAllocator allocator, + VmaAllocation allocation, + VkDeviceSize allocationLocalOffset, + VkImage image, + const void* pNext) +{ + VMA_ASSERT(allocator && allocation && image); + + VMA_DEBUG_LOG("vmaBindImageMemory2"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->BindImageMemory(allocation, allocationLocalOffset, image, pNext); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( + VmaAllocator allocator, + const VkBufferCreateInfo* pBufferCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkDeviceSize minAlignment, + VkBuffer* pBuffer, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pAllocationCreateInfo && VmaIsPow2(minAlignment) && pBuffer && pAllocation); + + if(pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateBufferWithAlignment"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pBuffer = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if(res >= 0) + { + // 2. vkGetBufferMemoryRequirements. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetBufferMemoryRequirements(*pBuffer, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + // 2a. Include minAlignment + vkMemReq.alignment = VMA_MAX(vkMemReq.alignment, minAlignment); + + // 3. Allocate memory using allocator. + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + *pBuffer, // dedicatedBuffer + VK_NULL_HANDLE, // dedicatedImage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + VMA_SUBALLOCATION_TYPE_BUFFER, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind buffer with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindBufferMemory(*pAllocation, 0, *pBuffer, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + *pBuffer = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + return vmaCreateAliasingBuffer2(allocator, allocation, 0, pBufferCreateInfo, pBuffer); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingBuffer2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkBufferCreateInfo* VMA_NOT_NULL pBufferCreateInfo, + VkBuffer VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pBuffer) +{ + VMA_ASSERT(allocator && pBufferCreateInfo && pBuffer && allocation); + VMA_ASSERT(allocationLocalOffset + pBufferCreateInfo->size <= allocation->GetSize()); + + VMA_DEBUG_LOG("vmaCreateAliasingBuffer2"); + + *pBuffer = VK_NULL_HANDLE; + + if (pBufferCreateInfo->size == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + if ((pBufferCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_COPY) != 0 && + !allocator->m_UseKhrBufferDeviceAddress) + { + VMA_ASSERT(0 && "Creating a buffer with VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT is not valid if VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT was not used."); + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkBuffer. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateBuffer)( + allocator->m_hDevice, + pBufferCreateInfo, + allocator->GetAllocationCallbacks(), + pBuffer); + if (res >= 0) + { + // 2. Bind buffer with memory. + res = allocator->BindBufferMemory(allocation, allocationLocalOffset, *pBuffer, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, *pBuffer, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyBuffer( + VmaAllocator allocator, + VkBuffer buffer, + VmaAllocation allocation) +{ + VMA_ASSERT(allocator); + + if(buffer == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyBuffer"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(buffer != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyBuffer)(allocator->m_hDevice, buffer, allocator->GetAllocationCallbacks()); + } + + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( + VmaAllocator allocator, + const VkImageCreateInfo* pImageCreateInfo, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkImage* pImage, + VmaAllocation* pAllocation, + VmaAllocationInfo* pAllocationInfo) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pAllocationCreateInfo && pImage && pAllocation); + + if(pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_LOG("vmaCreateImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + *pImage = VK_NULL_HANDLE; + *pAllocation = VK_NULL_HANDLE; + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if(res >= 0) + { + VmaSuballocationType suballocType = pImageCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? + VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL : + VMA_SUBALLOCATION_TYPE_IMAGE_LINEAR; + + // 2. Allocate memory using allocator. + VkMemoryRequirements vkMemReq = {}; + bool requiresDedicatedAllocation = false; + bool prefersDedicatedAllocation = false; + allocator->GetImageMemoryRequirements(*pImage, vkMemReq, + requiresDedicatedAllocation, prefersDedicatedAllocation); + + res = allocator->AllocateMemory( + vkMemReq, + requiresDedicatedAllocation, + prefersDedicatedAllocation, + VK_NULL_HANDLE, // dedicatedBuffer + *pImage, // dedicatedImage + VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage + *pAllocationCreateInfo, + suballocType, + 1, // allocationCount + pAllocation); + + if(res >= 0) + { + // 3. Bind image with memory. + if((pAllocationCreateInfo->flags & VMA_ALLOCATION_CREATE_DONT_BIND_BIT) == 0) + { + res = allocator->BindImageMemory(*pAllocation, 0, *pImage, VMA_NULL); + } + if(res >= 0) + { + // All steps succeeded. + #if VMA_STATS_STRING_ENABLED + (*pAllocation)->InitImageUsage(*pImageCreateInfo); + #endif + if(pAllocationInfo != VMA_NULL) + { + allocator->GetAllocationInfo(*pAllocation, pAllocationInfo); + } + + return VK_SUCCESS; + } + allocator->FreeMemory( + 1, // allocationCount + pAllocation); + *pAllocation = VK_NULL_HANDLE; + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + *pImage = VK_NULL_HANDLE; + return res; + } + return res; +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + return vmaCreateAliasingImage2(allocator, allocation, 0, pImageCreateInfo, pImage); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateAliasingImage2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VkDeviceSize allocationLocalOffset, + const VkImageCreateInfo* VMA_NOT_NULL pImageCreateInfo, + VkImage VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pImage) +{ + VMA_ASSERT(allocator && pImageCreateInfo && pImage && allocation); + + *pImage = VK_NULL_HANDLE; + + VMA_DEBUG_LOG("vmaCreateImage2"); + + if (pImageCreateInfo->extent.width == 0 || + pImageCreateInfo->extent.height == 0 || + pImageCreateInfo->extent.depth == 0 || + pImageCreateInfo->mipLevels == 0 || + pImageCreateInfo->arrayLayers == 0) + { + return VK_ERROR_INITIALIZATION_FAILED; + } + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + // 1. Create VkImage. + VkResult res = (*allocator->GetVulkanFunctions().vkCreateImage)( + allocator->m_hDevice, + pImageCreateInfo, + allocator->GetAllocationCallbacks(), + pImage); + if (res >= 0) + { + // 2. Bind image with memory. + res = allocator->BindImageMemory(allocation, allocationLocalOffset, *pImage, VMA_NULL); + if (res >= 0) + { + return VK_SUCCESS; + } + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, *pImage, allocator->GetAllocationCallbacks()); + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyImage( + VmaAllocator VMA_NOT_NULL allocator, + VkImage VMA_NULLABLE_NON_DISPATCHABLE image, + VmaAllocation VMA_NULLABLE allocation) +{ + VMA_ASSERT(allocator); + + if(image == VK_NULL_HANDLE && allocation == VK_NULL_HANDLE) + { + return; + } + + VMA_DEBUG_LOG("vmaDestroyImage"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + if(image != VK_NULL_HANDLE) + { + (*allocator->GetVulkanFunctions().vkDestroyImage)(allocator->m_hDevice, image, allocator->GetAllocationCallbacks()); + } + if(allocation != VK_NULL_HANDLE) + { + allocator->FreeMemory( + 1, // allocationCount + &allocation); + } +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateVirtualBlock( + const VmaVirtualBlockCreateInfo* VMA_NOT_NULL pCreateInfo, + VmaVirtualBlock VMA_NULLABLE * VMA_NOT_NULL pVirtualBlock) +{ + VMA_ASSERT(pCreateInfo && pVirtualBlock); + VMA_ASSERT(pCreateInfo->size > 0); + VMA_DEBUG_LOG("vmaCreateVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + *pVirtualBlock = vma_new(pCreateInfo->pAllocationCallbacks, VmaVirtualBlock_T)(*pCreateInfo); + VkResult res = (*pVirtualBlock)->Init(); + if(res < 0) + { + vma_delete(pCreateInfo->pAllocationCallbacks, *pVirtualBlock); + *pVirtualBlock = VK_NULL_HANDLE; + } + return res; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaDestroyVirtualBlock(VmaVirtualBlock VMA_NULLABLE virtualBlock) +{ + if(virtualBlock != VK_NULL_HANDLE) + { + VMA_DEBUG_LOG("vmaDestroyVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VkAllocationCallbacks allocationCallbacks = virtualBlock->m_AllocationCallbacks; // Have to copy the callbacks when destroying. + vma_delete(&allocationCallbacks, virtualBlock); + } +} + +VMA_CALL_PRE VkBool32 VMA_CALL_POST vmaIsVirtualBlockEmpty(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaIsVirtualBlockEmpty"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->IsEmpty() ? VK_TRUE : VK_FALSE; +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualAllocationInfo(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, VmaVirtualAllocationInfo* VMA_NOT_NULL pVirtualAllocInfo) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pVirtualAllocInfo != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualAllocationInfo"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetAllocationInfo(allocation, *pVirtualAllocInfo); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaVirtualAllocate(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + const VmaVirtualAllocationCreateInfo* VMA_NOT_NULL pCreateInfo, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE* VMA_NOT_NULL pAllocation, + VkDeviceSize* VMA_NULLABLE pOffset) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pCreateInfo != VMA_NULL && pAllocation != VMA_NULL); + VMA_DEBUG_LOG("vmaVirtualAllocate"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + return virtualBlock->Allocate(*pCreateInfo, *pAllocation, pOffset); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaVirtualFree(VmaVirtualBlock VMA_NOT_NULL virtualBlock, VmaVirtualAllocation VMA_NULLABLE_NON_DISPATCHABLE allocation) +{ + if(allocation != VK_NULL_HANDLE) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaVirtualFree"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Free(allocation); + } +} + +VMA_CALL_PRE void VMA_CALL_POST vmaClearVirtualBlock(VmaVirtualBlock VMA_NOT_NULL virtualBlock) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaClearVirtualBlock"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->Clear(); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaSetVirtualAllocationUserData(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaVirtualAllocation VMA_NOT_NULL_NON_DISPATCHABLE allocation, void* VMA_NULLABLE pUserData) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_LOG("vmaSetVirtualAllocationUserData"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->SetAllocationUserData(allocation, pUserData); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaGetVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaGetVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->GetStatistics(*pStats); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaCalculateVirtualBlockStatistics(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + VmaDetailedStatistics* VMA_NOT_NULL pStats) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && pStats != VMA_NULL); + VMA_DEBUG_LOG("vmaCalculateVirtualBlockStatistics"); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + virtualBlock->CalculateDetailedStatistics(*pStats); +} + +#if VMA_STATS_STRING_ENABLED + +VMA_CALL_PRE void VMA_CALL_POST vmaBuildVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE * VMA_NOT_NULL ppStatsString, VkBool32 detailedMap) +{ + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE && ppStatsString != VMA_NULL); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + const VkAllocationCallbacks* allocationCallbacks = virtualBlock->GetAllocationCallbacks(); + VmaStringBuilder sb(allocationCallbacks); + virtualBlock->BuildStatsString(detailedMap != VK_FALSE, sb); + *ppStatsString = VmaCreateStringCopy(allocationCallbacks, sb.GetData(), sb.GetLength()); +} + +VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock VMA_NOT_NULL virtualBlock, + char* VMA_NULLABLE pStatsString) +{ + if(pStatsString != VMA_NULL) + { + VMA_ASSERT(virtualBlock != VK_NULL_HANDLE); + VMA_DEBUG_GLOBAL_MUTEX_LOCK; + VmaFreeString(virtualBlock->GetAllocationCallbacks(), pStatsString); + } +} +#endif // VMA_STATS_STRING_ENABLED +#endif // _VMA_PUBLIC_INTERFACE +#endif // VMA_IMPLEMENTATION + +/** +\page quick_start Quick start + +\section quick_start_project_setup Project setup + +Vulkan Memory Allocator comes in form of a "stb-style" single header file. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. + +"Single header" doesn't mean that everything is contained in C/C++ declarations, +like it tends to be in case of inline functions or C++ templates. +It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. +If you don't do it properly, it will result in linker errors. + +To do it properly: + +-# Include "vk_mem_alloc.h" file in each CPP file where you want to use the library. + This includes declarations of all members of the library. +-# In exactly one CPP file define following macro before this include. + It enables also internal definitions. + +\code +#define VMA_IMPLEMENTATION +#include "vk_mem_alloc.h" +\endcode + +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". + +This library includes header ``, which in turn +includes `` on Windows. If you need some specific macros defined +before including these headers (like `WIN32_LEAN_AND_MEAN` or +`WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define +them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. + +This library is written in C++, but has C-compatible interface. +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full +implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. + + +\section quick_start_initialization Initialization + +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + +At program startup: + +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. + +Only members `physicalDevice`, `device`, `instance` are required. +However, you should inform the library which Vulkan version do you use by setting +VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable +by setting VmaAllocatorCreateInfo::flags. +Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. + +\subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version + +VMA supports Vulkan version down to 1.0, for backward compatibility. +If you want to use higher version, you need to inform the library about it. +This is a two-step process. + +Step 1: Compile time. By default, VMA compiles with code supporting the highest +Vulkan version found in the included `` that is also supported by the library. +If this is OK, you don't need to do anything. +However, if you want to compile VMA as if only some lower Vulkan version was available, +define macro `VMA_VULKAN_VERSION` before every `#include "vk_mem_alloc.h"`. +It should have decimal numeric value in form of ABBBCCC, where A = major, BBB = minor, CCC = patch Vulkan version. +For example, to compile against Vulkan 1.2: + +\code +#define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 +#include "vk_mem_alloc.h" +\endcode + +Step 2: Runtime. Even when compiled with higher Vulkan version available, +VMA can use only features of a lower version, which is configurable during creation of the #VmaAllocator object. +By default, only Vulkan 1.0 is used. +To initialize the allocator with support for higher Vulkan version, you need to set member +VmaAllocatorCreateInfo::vulkanApiVersion to an appropriate value, e.g. using constants like `VK_API_VERSION_1_2`. +See code sample below. + +\subsection quick_start_initialization_importing_vulkan_functions Importing Vulkan functions + +You may need to configure importing Vulkan functions. There are 3 ways to do this: + +-# **If you link with Vulkan static library** (e.g. "vulkan-1.lib" on Windows): + - You don't need to do anything. + - VMA will use these, as macro `VMA_STATIC_VULKAN_FUNCTIONS` is defined to 1 by default. +-# **If you want VMA to fetch pointers to Vulkan functions dynamically** using `vkGetInstanceProcAddr`, + `vkGetDeviceProcAddr` (this is the option presented in the example below): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` to 0, `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 1. + - Provide pointers to these two functions via VmaVulkanFunctions::vkGetInstanceProcAddr, + VmaVulkanFunctions::vkGetDeviceProcAddr. + - The library will fetch pointers to all other functions it needs internally. +-# **If you fetch pointers to all Vulkan functions in a custom way**, e.g. using some loader like + [Volk](https://github.com/zeux/volk): + - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. + - Pass these pointers via structure #VmaVulkanFunctions. + +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT + +Example with fetching pointers to Vulkan functions dynamically: + +\code +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#include "vk_mem_alloc.h" + +... + +VmaVulkanFunctions vulkanFunctions = {}; +vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; +vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; + +VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; +allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; +allocatorCreateInfo.physicalDevice = physicalDevice; +allocatorCreateInfo.device = device; +allocatorCreateInfo.instance = instance; +allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + +VmaAllocator allocator; +vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); +\endcode + + +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + +\section quick_start_resource_allocation Resource allocation + +When you want to create a buffer or image: + +-# Fill `VkBufferCreateInfo` / `VkImageCreateInfo` structure. +-# Fill VmaAllocationCreateInfo structure. +-# Call vmaCreateBuffer() / vmaCreateImage() to get `VkBuffer`/`VkImage` with memory + already allocated and bound to it, plus #VmaAllocation objects that represents its underlying memory. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +Don't forget to destroy your buffer and allocation objects when no longer needed: + +\code +vmaDestroyBuffer(allocator, buffer, allocation); +\endcode + +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + + +\page choosing_memory_type Choosing memory type + +Physical devices in Vulkan support various combinations of memory heaps and +types. Help with choosing correct and optimal memory type for your specific +resource is one of the key features of this library. You can use it by filling +appropriate members of VmaAllocationCreateInfo structure, as described below. +You can also combine multiple methods. + +-# If you just want to find memory type index that meets your requirements, you + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). +-# If you want to allocate a region of device memory without association with any + specific image or buffer, you can use function vmaAllocateMemory(). Usage of + this function is not recommended and usually not needed. + vmaAllocateMemoryPages() function is also provided for creating multiple allocations at once, + which may be useful for sparse binding. +-# If you already have a buffer or an image created, you want to allocate memory + for it and then you will bind it yourself, you can use function + vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). + For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() + or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). +-# If you want to create a buffer or an image, allocate memory for it, and bind + them together, all in one call, you can use function vmaCreateBuffer(), + vmaCreateImage(). + This is the easiest and recommended way to use this library! + +When using 3. or 4., the library internally queries Vulkan for memory types +supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) +and uses only one of these types. + +If no memory type can be found that meets all the requirements, these functions +return `VK_ERROR_FEATURE_NOT_PRESENT`. + +You can leave VmaAllocationCreateInfo structure completely filled with zeros. +It means no requirements are specified for memory type. +It is valid, although not very useful. + +\section choosing_memory_type_usage Usage + +The easiest way to specify memory requirements is to fill member +VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. +It defines high level, common usage types. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. + +For example, if you want to create a uniform buffer that will be filled using +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. + +\code +VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufferInfo.size = 65536; +bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer described previously can be created like this. +It will likely end up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as described below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are deprecated. + +\section choosing_memory_type_required_preferred_flags Required and preferred flags + +You can specify more detailed requirements by filling members +VmaAllocationCreateInfo::requiredFlags and VmaAllocationCreateInfo::preferredFlags +with a combination of bits from enum `VkMemoryPropertyFlags`. For example, +if you want to create a buffer that will be persistently mapped on host (so it +must be `HOST_VISIBLE`) and preferably will also be `HOST_COHERENT` and `HOST_CACHED`, +use following code: + +\code +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; +allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +A memory type is chosen that has all the required flags and as many preferred +flags set as possible. + +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). + +\section choosing_memory_type_explicit_memory_types Explicit memory types + +If you inspected memory types available on the physical device and you have +a preference for memory types that you want to use, you can fill member +VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set +means that a memory type with that index is allowed to be used for the +allocation. Special value 0, just like `UINT32_MAX`, means there are no +restrictions to memory type index. + +Please note that this member is NOT just a memory type index. +Still you can use it to choose just one, specific memory type. +For example, if you already determined that your buffer should be created in +memory type 2, use following code: + +\code +uint32_t memoryTypeIndex = 2; + +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.memoryTypeBits = 1u << memoryTypeIndex; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); +\endcode + +You can also use this parameter to exclude some memory types. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + + +\section choosing_memory_type_custom_memory_pools Custom memory pools + +If you allocate from custom memory pool, all the ways of specifying memory +requirements described above are not applicable and the aforementioned members +of VmaAllocationCreateInfo structure are ignored. Memory type is selected +explicitly when creating the pool and then used to make all the allocations from +that pool. For further details, see \ref custom_memory_pools. + +\section choosing_memory_type_dedicated_allocations Dedicated allocations + +Memory for allocations is reserved out of larger block of `VkDeviceMemory` +allocated from Vulkan internally. That is the main feature of this whole library. +You can still request a separate memory block to be created for an allocation, +just like you would do in a trivial solution without using any allocator. +In that case, a buffer or image is always bound to that memory at offset 0. +This is called a "dedicated allocation". +You can explicitly request it by using flag #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +The library can also internally decide to use dedicated allocation in some cases, e.g.: + +- When the size of the allocation is large. +- When [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension is enabled + and it reports that dedicated allocation is required or recommended for the resource. +- When allocation of next big memory block fails due to not enough device memory, + but allocation with the exact requested size succeeds. + + +\page memory_mapping Memory mapping + +To "map memory" in Vulkan means to obtain a CPU pointer to `VkDeviceMemory`, +to be able to read from it or write to it in CPU code. +Mapping is possible only of memory allocated from a memory type that has +`VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +Functions `vkMapMemory()`, `vkUnmapMemory()` are designed for this purpose. +You can use them directly with memory allocated by this library, +but it is not recommended because of following issue: +Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. +This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. +Because of this, Vulkan Memory Allocator provides following facilities: + +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). + +\section memory_mapping_mapping_functions Mapping functions + +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). +They are safer and more convenient to use than standard Vulkan functions. +You can map an allocation multiple times simultaneously - mapping is reference-counted internally. +You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. +The way it is implemented is that the library always maps entire memory block, not just region of the allocation. +For further details, see description of vmaMapMemory() function. +Example: + +\code +// Having these objects initialized: +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... + +// You can map and fill your buffer using following code: + +void* mappedData; +vmaMapMemory(allocator, constantBufferAllocation, &mappedData); +memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); +vmaUnmapMemory(allocator, constantBufferAllocation); +\endcode + +When mapping, you may see a warning from Vulkan validation layer similar to this one: + +Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used. + +It happens because the library maps entire `VkDeviceMemory` block, where different +types of images and buffers may end up together, especially on GPUs with unified memory like Intel. +You can safely ignore it if you are sure you access only memory of the intended +object that you wanted to map. + + +\section memory_mapping_persistently_mapped_memory Persistently mapped memory + +Keeping your memory persistently mapped is generally OK in Vulkan. +You don't need to unmap it before using its data on the GPU. +The library provides a special feature designed for that: +Allocations made with #VMA_ALLOCATION_CREATE_MAPPED_BIT flag set in +VmaAllocationCreateInfo::flags stay mapped all the time, +so you can just access CPU pointer to it any time +without a need to call any "map" or "unmap" function. +Example: + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +// Buffer is already mapped. You can access its memory. +memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); +\endcode + +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. + +\section memory_mapping_cache_control Cache flush and invalidate + +Memory in Vulkan doesn't need to be unmapped before using it on GPU, +but unless a memory types has `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` flag set, +you need to manually **invalidate** cache before reading of mapped pointer +and **flush** cache after writing to mapped pointer. +Map/unmap operations don't do that automatically. +Vulkan provides following functions for this purpose `vkFlushMappedMemoryRanges()`, +`vkInvalidateMappedMemoryRanges()`, but this library provides more convenient +functions that refer to given allocation object: vmaFlushAllocation(), +vmaInvalidateAllocation(), +or multiple objects at once: vmaFlushAllocations(), vmaInvalidateAllocations(). + +Regions of memory specified for flush/invalidate must be aligned to +`VkPhysicalDeviceLimits::nonCoherentAtomSize`. This is automatically ensured by the library. +In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocations +within blocks are aligned to this value, so their offsets are always multiply of +`nonCoherentAtomSize` and two different allocations never share same "line" of this size. + +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) +currently provide `HOST_COHERENT` flag on all memory types that are +`HOST_VISIBLE`, so on PC you may not need to bother. + + +\page staying_within_budget Staying within budget + +When developing a graphics-intensive game or program, it is important to avoid allocating +more GPU memory than it is physically available. When the memory is over-committed, +various bad things can happen, depending on the specific GPU, graphics driver, and +operating system: + +- It may just work without any problems. +- The application may slow down because some memory blocks are moved to system RAM + and the GPU has to access them through PCI Express bus. +- A new allocation may take very long time to complete, even few seconds, and possibly + freeze entire system. +- The new allocation may fail with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- It may even result in GPU crash (TDR), observed as `VK_ERROR_DEVICE_LOST` + returned somewhere later. + +\section staying_within_budget_querying_for_budget Querying for budget + +To query for current memory usage and available budget, use function vmaGetHeapBudgets(). +Returned structure #VmaBudget contains quantities expressed in bytes, per Vulkan memory heap. + +Please note that this function returns different information and works faster than +vmaCalculateStatistics(). vmaGetHeapBudgets() can be called every frame or even before every +allocation, while vmaCalculateStatistics() is intended to be used rarely, +only to obtain statistical information, e.g. for debugging purposes. + +It is recommended to use VK_EXT_memory_budget device extension to obtain information +about the budget from Vulkan device. VMA is able to use this extension automatically. +When not enabled, the allocator behaves same way, but then it estimates current usage +and available budget based on its internal information and Vulkan memory heap sizes, +which may be less precise. In order to use this extension: + +1. Make sure extensions VK_EXT_memory_budget and VK_KHR_get_physical_device_properties2 + required by it are available and enable them. Please note that the first is a device + extension and the second is instance extension! +2. Use flag #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT when creating #VmaAllocator object. +3. Make sure to call vmaSetCurrentFrameIndex() every frame. Budget is queried from + Vulkan inside of it to avoid overhead of querying it with every allocation. + +\section staying_within_budget_controlling_memory_usage Controlling memory usage + +There are many ways in which you can try to stay within the budget. + +First, when making new allocation requires allocating a new memory block, the library +tries not to exceed the budget automatically. If a block with default recommended size +(e.g. 256 MB) would go over budget, a smaller block is allocated, possibly even +dedicated memory for just this resource. + +If the size of the requested resource plus current memory usage is more than the +budget, by default the library still tries to create it, leaving it to the Vulkan +implementation whether the allocation succeeds or fails. You can change this behavior +by using #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag. With it, the allocation is +not made if it would exceed the budget or if the budget is already exceeded. +VMA then tries to make the allocation from the next eligible Vulkan memory type. +The all of them fail, the call then fails with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +Example usage pattern may be to pass the #VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT flag +when creating resources that are not essential for the application (e.g. the texture +of a specific object) and not to pass it when creating critically important resources +(e.g. render targets). + +On AMD graphics cards there is a custom vendor extension available: VK_AMD_memory_overallocation_behavior +that allows to control the behavior of the Vulkan implementation in out-of-memory cases - +whether it should fail with an error code or still allow the allocation. +Usage of this extension involves only passing extra structure on Vulkan device creation, +so it is out of scope of this library. + +Finally, you can also use #VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT flag to make sure +a new allocation is created only when it fits inside one of the existing memory blocks. +If it would require to allocate a new block, if fails instead with `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +This also ensures that the function call is very fast because it never goes to Vulkan +to obtain a new block. + +\note Creating \ref custom_memory_pools with VmaPoolCreateInfo::minBlockCount +set to more than 0 will currently try to allocate memory blocks without checking whether they +fit within budget. + + +\page resource_aliasing Resource aliasing (overlap) + +New explicit graphics APIs (Vulkan and Direct3D 12), thanks to manual memory +management, give an opportunity to alias (overlap) multiple resources in the +same region of memory - a feature not available in the old APIs (Direct3D 11, OpenGL). +It can be useful to save video memory, but it must be used with caution. + +For example, if you know the flow of your whole render frame in advance, you +are going to use some intermediate textures or buffers only during a small range of render passes, +and you know these ranges don't overlap in time, you can bind these resources to +the same place in memory, even if they have completely different parameters (width, height, format etc.). + +![Resource aliasing (overlap)](../gfx/Aliasing.png) + +Such scenario is possible using VMA, but you need to create your images manually. +Then you need to calculate parameters of an allocation to be made using formula: + +- allocation size = max(size of each image) +- allocation alignment = max(alignment of each image) +- allocation memoryTypeBits = bitwise AND(memoryTypeBits of each image) + +Following example shows two different images bound to the same place in memory, +allocated to fit largest of them. + +\code +// A 512x512 texture to be sampled. +VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img1CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img1CreateInfo.extent.width = 512; +img1CreateInfo.extent.height = 512; +img1CreateInfo.extent.depth = 1; +img1CreateInfo.mipLevels = 10; +img1CreateInfo.arrayLayers = 1; +img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB; +img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; +img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +// A full screen texture to be used as color attachment. +VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +img2CreateInfo.imageType = VK_IMAGE_TYPE_2D; +img2CreateInfo.extent.width = 1920; +img2CreateInfo.extent.height = 1080; +img2CreateInfo.extent.depth = 1; +img2CreateInfo.mipLevels = 1; +img2CreateInfo.arrayLayers = 1; +img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VkImage img1; +res = vkCreateImage(device, &img1CreateInfo, nullptr, &img1); +VkImage img2; +res = vkCreateImage(device, &img2CreateInfo, nullptr, &img2); + +VkMemoryRequirements img1MemReq; +vkGetImageMemoryRequirements(device, img1, &img1MemReq); +VkMemoryRequirements img2MemReq; +vkGetImageMemoryRequirements(device, img2, &img2MemReq); + +VkMemoryRequirements finalMemReq = {}; +finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size); +finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment); +finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits; +// Validate if(finalMemReq.memoryTypeBits != 0) + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + +VmaAllocation alloc; +res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); + +res = vmaBindImageMemory(allocator, alloc, img1); +res = vmaBindImageMemory(allocator, alloc, img2); + +// You can use img1, img2 here, but not at the same time! + +vmaFreeMemory(allocator, alloc); +vkDestroyImage(allocator, img2, nullptr); +vkDestroyImage(allocator, img1, nullptr); +\endcode + +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + +Remember that using resources that alias in memory requires proper synchronization. +You need to issue a memory barrier to make sure commands that use `img1` and `img2` +don't overlap on GPU timeline. +You also need to treat a resource after aliasing as uninitialized - containing garbage data. +For example, if you use `img1` and then want to use `img2`, you need to issue +an image memory barrier for `img2` with `oldLayout` = `VK_IMAGE_LAYOUT_UNDEFINED`. + +Additional considerations: + +- Vulkan also allows to interpret contents of memory between aliasing resources consistently in some cases. +See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ALIAS_BIT` flag. +- You can create more complex layout where different images and buffers are bound +at different offsets inside one large allocation. For example, one can imagine +a big texture used in some render passes, aliasing with a set of many small buffers +used between in some further passes. To bind a resource at non-zero offset in an allocation, +use vmaBindBufferMemory2() / vmaBindImageMemory2(). +- Before allocating memory for the resources you want to alias, check `memoryTypeBits` +returned in memory requirements of each resource to make sure the bits overlap. +Some GPUs may expose multiple memory types suitable e.g. only for buffers or +images with `COLOR_ATTACHMENT` usage, so the sets of memory types supported by your +resources may be disjoint. Aliasing them is not possible in that case. + + +\page custom_memory_pools Custom memory pools + +A memory pool contains a number of `VkDeviceMemory` blocks. +The library automatically creates and manages default pool for each memory type available on the device. +Default memory pool automatically grows in size. +Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. + +You can create custom pool and allocate memory out of it. +It can be useful if you want to: + +- Keep certain kind of allocations separate from others. +- Enforce particular, fixed size of Vulkan memory blocks. +- Limit maximum amount of Vulkan memory allocated for that pool. +- Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. +- Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in + #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. + +To use custom memory pools: + +-# Fill VmaPoolCreateInfo structure. +-# Call vmaCreatePool() to obtain #VmaPool handle. +-# When making an allocation, set VmaAllocationCreateInfo::pool to this handle. + You don't need to specify any other parameters of this structure, like `usage`. + +Example: + +\code +// Find memoryTypeIndex for the pool. +VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +sampleBufCreateInfo.size = 0x10000; // Doesn't matter. +sampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo sampleAllocCreateInfo = {}; +sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +VkResult res = vmaFindMemoryTypeIndexForBufferInfo(allocator, + &sampleBufCreateInfo, &sampleAllocCreateInfo, &memTypeIndex); +// Check res... + +// Create a pool that can have at most 2 blocks, 128 MiB each. +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +poolCreateInfo.blockSize = 128ull * 1024 * 1024; +poolCreateInfo.maxBlockCount = 2; + +VmaPool pool; +res = vmaCreatePool(allocator, &poolCreateInfo, &pool); +// Check res... + +// Allocate a buffer out of it. +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 1024; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.pool = pool; + +VkBuffer buf; +VmaAllocation alloc; +res = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); +// Check res... +\endcode + +You have to free all allocations made from this pool before destroying it. + +\code +vmaDestroyBuffer(allocator, buf, alloc); +vmaDestroyPool(allocator, pool); +\endcode + +New versions of this library support creating dedicated allocations in custom pools. +It is supported only when VmaPoolCreateInfo::blockSize = 0. +To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and +VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. + + +\section custom_memory_pools_MemTypeIndex Choosing memory type index + +When creating a pool, you must explicitly specify memory type index. +To find the one suitable for your buffers or images, you can use helper functions +vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo(). +You need to provide structures with example parameters of buffers or images +that you are going to create in that pool. + +\code +VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + +uint32_t memTypeIndex; +vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); + +VmaPoolCreateInfo poolCreateInfo = {}; +poolCreateInfo.memoryTypeIndex = memTypeIndex; +// ... +\endcode + +When creating buffers/images allocated in that pool, provide following parameters: + +- `VkBufferCreateInfo`: Prefer to pass same parameters as above. + Otherwise you risk creating resources in a memory type that is not suitable for them, which may result in undefined behavior. + Using different `VK_BUFFER_USAGE_` flags may work, but you shouldn't create images in a pool intended for buffers + or the other way around. +- VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. + Other members are ignored anyway. + + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. + Some of them may be partially or even completely empty. + Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). + When using default pools, best memory type for each of your allocations can be selected automatically + using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. + When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, + while default pools start from small blocks and only allocate next blocks larger and larger + up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) + separate, you likely don't need to. + VMA uses a high quality allocation algorithm that manages memory well in various cases. + Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. + VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. + VMA respects `nonCoherentAtomSize` limit automatically. + It also maps only those `VkDeviceMemory` blocks that need to map any allocation. + It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead + using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, + you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it + using default pools with dedicated function vmaCreateBufferWithAlignment(). + + +\section linear_algorithm Linear allocation algorithm + +Each Vulkan memory block managed by this library has accompanying metadata that +keeps track of used and unused regions. By default, the metadata structure and +algorithm tries to find best place for new allocations among free regions to +optimize memory usage. This way you can allocate and free objects in any order. + +![Default allocation algorithm](../gfx/Linear_allocator_1_algo_default.png) + +Sometimes there is a need to use simpler, linear allocation algorithm. You can +create custom pool that uses such algorithm by adding flag +#VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT to VmaPoolCreateInfo::flags while creating +#VmaPool object. Then an alternative metadata management is used. It always +creates new allocations after last one and doesn't reuse free regions after +allocations freed in the middle. It results in better allocation performance and +less memory consumed by metadata. + +![Linear allocation algorithm](../gfx/Linear_allocator_2_algo_linear.png) + +With this one flag, you can create a custom pool that can be used in many ways: +free-at-once, stack, double stack, and ring buffer. See below for details. +You don't need to specify explicitly which of these options you are going to use - it is detected automatically. + +\subsection linear_algorithm_free_at_once Free-at-once + +In a pool that uses linear algorithm, you still need to free all the allocations +individually, e.g. by using vmaFreeMemory() or vmaDestroyBuffer(). You can free +them in any order. New allocations are always made after last one - free space +in the middle is not reused. However, when you release all the allocation and +the pool becomes empty, allocation starts from the beginning again. This way you +can use linear algorithm to speed up creation of allocations that you are going +to release all at once. + +![Free-at-once](../gfx/Linear_allocator_3_free_at_once.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_stack Stack + +When you free an allocation that was created last, its space can be reused. +Thanks to this, if you always release allocations in the order opposite to their +creation (LIFO - Last In First Out), you can achieve behavior of a stack. + +![Stack](../gfx/Linear_allocator_4_stack.png) + +This mode is also available for pools created with VmaPoolCreateInfo::maxBlockCount +value that allows multiple memory blocks. + +\subsection linear_algorithm_double_stack Double stack + +The space reserved by a custom pool with linear algorithm may be used by two +stacks: + +- First, default one, growing up from offset 0. +- Second, "upper" one, growing down from the end towards lower offsets. + +To make allocation from the upper stack, add flag #VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT +to VmaAllocationCreateInfo::flags. + +![Double stack](../gfx/Linear_allocator_7_double_stack.png) + +Double stack is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +When the two stacks' ends meet so there is not enough space between them for a +new allocation, such allocation fails with usual +`VK_ERROR_OUT_OF_DEVICE_MEMORY` error. + +\subsection linear_algorithm_ring_buffer Ring buffer + +When you free some allocations from the beginning and there is not enough free space +for a new one at the end of a pool, allocator's "cursor" wraps around to the +beginning and starts allocation there. Thanks to this, if you always release +allocations in the same order as you created them (FIFO - First In First Out), +you can achieve behavior of a ring buffer / queue. + +![Ring buffer](../gfx/Linear_allocator_5_ring_buffer.png) + +Ring buffer is available only in pools with one memory block - +VmaPoolCreateInfo::maxBlockCount must be 1. Otherwise behavior is undefined. + +\note \ref defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page defragmentation Defragmentation + +Interleaved allocations and deallocations of many objects of varying size can +cause fragmentation over time, which can lead to a situation where the library is unable +to find a continuous range of free memory for a new allocation despite there is +enough free space, just scattered across many small free ranges between existing +allocations. + +To mitigate this problem, you can use defragmentation feature. +It doesn't happen automatically though and needs your cooperation, +because VMA is a low level library that only allocates memory. +It cannot recreate buffers and images in a new place as it doesn't remember the contents of `VkBufferCreateInfo` / `VkImageCreateInfo` structures. +It cannot copy their contents as it doesn't record any commands to a command buffer. + +Example: + +\code +VmaDefragmentationInfo defragInfo = {}; +defragInfo.pool = myPool; +defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT; + +VmaDefragmentationContext defragCtx; +VkResult res = vmaBeginDefragmentation(allocator, &defragInfo, &defragCtx); +// Check res... + +for(;;) +{ + VmaDefragmentationPassMoveInfo pass; + res = vmaBeginDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... + + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. + VmaAllocationInfo allocInfo; + vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); + MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; + + // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. + VkImageCreateInfo imgCreateInfo = ... + VkImage newImg; + res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); + // Check res... + res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); + // Check res... + + // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. + vkCmdCopyImage(cmdBuf, resData->img, ..., newImg, ...); + } + + // Make sure the copy commands finished executing. + vkWaitForFences(...); + + // Destroy old buffers/images bound with pass.pMoves[i].srcAllocation. + for(uint32_t i = 0; i < pass.moveCount; ++i) + { + // ... + vkDestroyImage(device, resData->img, nullptr); + } + + // Update appropriate descriptors to point to the new places... + + res = vmaEndDefragmentationPass(allocator, defragCtx, &pass); + if(res == VK_SUCCESS) + break; + else if(res != VK_INCOMPLETE) + // Handle error... +} + +vmaEndDefragmentation(allocator, defragCtx, nullptr); +\endcode + +Although functions like vmaCreateBuffer(), vmaCreateImage(), vmaDestroyBuffer(), vmaDestroyImage() +create/destroy an allocation and a buffer/image at once, these are just a shortcut for +creating the resource, allocating memory, and binding them together. +Defragmentation works on memory allocations only. You must handle the rest manually. +Defragmentation is an iterative process that should repreat "passes" as long as related functions +return `VK_INCOMPLETE` not `VK_SUCCESS`. +In each pass: + +1. vmaBeginDefragmentationPass() function call: + - Calculates and returns the list of allocations to be moved in this pass. + Note this can be a time-consuming process. + - Reserves destination memory for them by creating temporary destination allocations + that you can query for their `VkDeviceMemory` + offset using vmaGetAllocationInfo(). +2. Inside the pass, **you should**: + - Inspect the returned list of allocations to be moved. + - Create new buffers/images and bind them at the returned destination temporary allocations. + - Copy data from source to destination resources if necessary. + - Destroy the source buffers/images, but NOT their allocations. +3. vmaEndDefragmentationPass() function call: + - Frees the source memory reserved for the allocations that are moved. + - Modifies source #VmaAllocation objects that are moved to point to the destination reserved memory. + - Frees `VkDeviceMemory` blocks that became empty. + +Unlike in previous iterations of the defragmentation API, there is no list of "movable" allocations passed as a parameter. +Defragmentation algorithm tries to move all suitable allocations. +You can, however, refuse to move some of them inside a defragmentation pass, by setting +`pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. +This is not recommended and may result in suboptimal packing of the allocations after defragmentation. +If you cannot ensure any allocation can be moved, it is better to keep movable allocations separate in a custom pool. + +Inside a pass, for each allocation that should be moved: + +- You should copy its data from the source to the destination place by calling e.g. `vkCmdCopyBuffer()`, `vkCmdCopyImage()`. + - You need to make sure these commands finished executing before destroying the source buffers/images and before calling vmaEndDefragmentationPass(). +- If a resource doesn't contain any meaningful data, e.g. it is a transient color attachment image to be cleared, + filled, and used temporarily in each rendering frame, you can just recreate this image + without copying its data. +- If the resource is in `HOST_VISIBLE` and `HOST_CACHED` memory, you can copy its data on the CPU + using `memcpy()`. +- If you cannot move the allocation, you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_IGNORE. + This will cancel the move. + - vmaEndDefragmentationPass() will then free the destination memory + not the source memory of the allocation, leaving it unchanged. +- If you decide the allocation is unimportant and can be destroyed instead of moved (e.g. it wasn't used for long time), + you can set `pass.pMoves[i].operation` to #VMA_DEFRAGMENTATION_MOVE_OPERATION_DESTROY. + - vmaEndDefragmentationPass() will then free both source and destination memory, and will destroy the source #VmaAllocation object. + +You can defragment a specific custom pool by setting VmaDefragmentationInfo::pool +(like in the example above) or all the default pools by setting this member to null. + +Defragmentation is always performed in each pool separately. +Allocations are never moved between different Vulkan memory types. +The size of the destination memory reserved for a moved allocation is the same as the original one. +Alignment of an allocation as it was determined using `vkGetBufferMemoryRequirements()` etc. is also respected after defragmentation. +Buffers/images should be recreated with the same `VkBufferCreateInfo` / `VkImageCreateInfo` parameters as the original ones. + +You can perform the defragmentation incrementally to limit the number of allocations and bytes to be moved +in each pass, e.g. to call it in sync with render frames and not to experience too big hitches. +See members: VmaDefragmentationInfo::maxBytesPerPass, VmaDefragmentationInfo::maxAllocationsPerPass. + +It is also safe to perform the defragmentation asynchronously to render frames and other Vulkan and VMA +usage, possibly from multiple threads, with the exception that allocations +returned in VmaDefragmentationPassMoveInfo::pMoves shouldn't be destroyed until the defragmentation pass is ended. + +Mapping is preserved on allocations that are moved during defragmentation. +Whether through #VMA_ALLOCATION_CREATE_MAPPED_BIT or vmaMapMemory(), the allocations +are mapped at their new place. Of course, pointer to the mapped data changes, so it needs to be queried +using VmaAllocationInfo::pMappedData. + +\note Defragmentation is not supported in custom pools created with #VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT. + + +\page statistics Statistics + +This library contains several functions that return information about its internal state, +especially the amount of memory allocated from Vulkan. + +\section statistics_numeric_statistics Numeric statistics + +If you need to obtain basic statistics about memory usage per heap, together with current budget, +you can call function vmaGetHeapBudgets() and inspect structure #VmaBudget. +This is useful to keep track of memory usage and stay within budget +(see also \ref staying_within_budget). +Example: + +\code +uint32_t heapIndex = ... + +VmaBudget budgets[VK_MAX_MEMORY_HEAPS]; +vmaGetHeapBudgets(allocator, budgets); + +printf("My heap currently has %u allocations taking %llu B,\n", + budgets[heapIndex].statistics.allocationCount, + budgets[heapIndex].statistics.allocationBytes); +printf("allocated out of %u Vulkan device memory blocks taking %llu B,\n", + budgets[heapIndex].statistics.blockCount, + budgets[heapIndex].statistics.blockBytes); +printf("Vulkan reports total usage %llu B with budget %llu B.\n", + budgets[heapIndex].usage, + budgets[heapIndex].budget); +\endcode + +You can query for more detailed statistics per memory heap, type, and totals, +including minimum and maximum allocation size and unused range size, +by calling function vmaCalculateStatistics() and inspecting structure #VmaTotalStatistics. +This function is slower though, as it has to traverse all the internal data structures, +so it should be used only for debugging purposes. + +You can query for statistics of a custom pool using function vmaGetPoolStatistics() +or vmaCalculatePoolStatistics(). + +You can query for information about a specific allocation using function vmaGetAllocationInfo(). +It fill structure #VmaAllocationInfo. + +\section statistics_json_dump JSON dump + +You can dump internal state of the allocator to a string in JSON format using function vmaBuildStatsString(). +The result is guaranteed to be correct JSON. +It uses ANSI encoding. +Any strings provided by user (see [Allocation names](@ref allocation_names)) +are copied as-is and properly escaped for JSON, so if they use UTF-8, ISO-8859-2 or any other encoding, +this JSON string can be treated as using this encoding. +It must be freed using function vmaFreeStatsString(). + +The format of this JSON string is not part of official documentation of the library, +but it will not change in backward-incompatible way without increasing library major version number +and appropriate mention in changelog. + +The JSON string contains all the data that can be obtained using vmaCalculateStatistics(). +It can also contain detailed map of allocated memory blocks and their regions - +free and occupied by allocations. +This allows e.g. to visualize the memory or assess fragmentation. + + +\page allocation_annotation Allocation names and user data + +\section allocation_user_data Allocation user data + +You can annotate allocations with your own information, e.g. for debugging purposes. +To do that, fill VmaAllocationCreateInfo::pUserData field when creating +an allocation. It is an opaque `void*` pointer. You can use it e.g. as a pointer, +some handle, index, key, ordinal number or any other value that would associate +the allocation with your custom metadata. +It is useful to identify appropriate data structures in your engine given #VmaAllocation, +e.g. when doing \ref defragmentation. + +\code +VkBufferCreateInfo bufCreateInfo = ... + +MyBufferMetadata* pMetadata = CreateBufferMetadata(); + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.pUserData = pMetadata; + +VkBuffer buffer; +VmaAllocation allocation; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +\endcode + +The pointer may be later retrieved as VmaAllocationInfo::pUserData: + +\code +VmaAllocationInfo allocInfo; +vmaGetAllocationInfo(allocator, allocation, &allocInfo); +MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; +\endcode + +It can also be changed using function vmaSetAllocationUserData(). + +Values of (non-zero) allocations' `pUserData` are printed in JSON report created by +vmaBuildStatsString() in hexadecimal form. + +\section allocation_names Allocation names + +An allocation can also carry a null-terminated string, giving a name to the allocation. +To set it, call vmaSetAllocationName(). +The library creates internal copy of the string, so the pointer you pass doesn't need +to be valid for whole lifetime of the allocation. You can free it after the call. + +\code +std::string imageName = "Texture: "; +imageName += fileName; +vmaSetAllocationName(allocator, allocation, imageName.c_str()); +\endcode + +The string can be later retrieved by inspecting VmaAllocationInfo::pName. +It is also printed in JSON report created by vmaBuildStatsString(). + +\note Setting string name to VMA allocation doesn't automatically set it to the Vulkan buffer or image created with it. +You must do it manually using an extension like VK_EXT_debug_utils, which is independent of this library. + + +\page virtual_allocator Virtual allocator + +As an extra feature, the core allocation algorithm of the library is exposed through a simple and convenient API of "virtual allocator". +It doesn't allocate any real GPU memory. It just keeps track of used and free regions of a "virtual block". +You can use it to allocate your own memory or other objects, even completely unrelated to Vulkan. +A common use case is sub-allocation of pieces of one large GPU buffer. + +\section virtual_allocator_creating_virtual_block Creating virtual block + +To use this functionality, there is no main "allocator" object. +You don't need to have #VmaAllocator object created. +All you need to do is to create a separate #VmaVirtualBlock object for each block of memory you want to be managed by the allocator: + +-# Fill in #VmaVirtualBlockCreateInfo structure. +-# Call vmaCreateVirtualBlock(). Get new #VmaVirtualBlock object. + +Example: + +\code +VmaVirtualBlockCreateInfo blockCreateInfo = {}; +blockCreateInfo.size = 1048576; // 1 MB + +VmaVirtualBlock block; +VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block); +\endcode + +\section virtual_allocator_making_virtual_allocations Making virtual allocations + +#VmaVirtualBlock object contains internal data structure that keeps track of free and occupied regions +using the same code as the main Vulkan memory allocator. +Similarly to #VmaAllocation for standard GPU allocations, there is #VmaVirtualAllocation type +that represents an opaque handle to an allocation within the virtual block. + +In order to make such allocation: + +-# Fill in #VmaVirtualAllocationCreateInfo structure. +-# Call vmaVirtualAllocate(). Get new #VmaVirtualAllocation object that represents the allocation. + You can also receive `VkDeviceSize offset` that was assigned to the allocation. + +Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB + +VmaVirtualAllocation alloc; +VkDeviceSize offset; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, &offset); +if(res == VK_SUCCESS) +{ + // Use the 4 KB of your memory starting at offset. +} +else +{ + // Allocation failed - no space for it could be found. Handle this error! +} +\endcode + +\section virtual_allocator_deallocation Deallocation + +When no longer needed, an allocation can be freed by calling vmaVirtualFree(). +You can only pass to this function an allocation that was previously returned by vmaVirtualAllocate() +called for the same #VmaVirtualBlock. + +When whole block is no longer needed, the block object can be released by calling vmaDestroyVirtualBlock(). +All allocations must be freed before the block is destroyed, which is checked internally by an assert. +However, if you don't want to call vmaVirtualFree() for each allocation, you can use vmaClearVirtualBlock() to free them all at once - +a feature not available in normal Vulkan memory allocator. Example: + +\code +vmaVirtualFree(block, alloc); +vmaDestroyVirtualBlock(block); +\endcode + +\section virtual_allocator_allocation_parameters Allocation parameters + +You can attach a custom pointer to each allocation by using vmaSetVirtualAllocationUserData(). +Its default value is null. +It can be used to store any data that needs to be associated with that allocation - e.g. an index, a handle, or a pointer to some +larger data structure containing more information. Example: + +\code +struct CustomAllocData +{ + std::string m_AllocName; +}; +CustomAllocData* allocData = new CustomAllocData(); +allocData->m_AllocName = "My allocation 1"; +vmaSetVirtualAllocationUserData(block, alloc, allocData); +\endcode + +The pointer can later be fetched, along with allocation offset and size, by passing the allocation handle to function +vmaGetVirtualAllocationInfo() and inspecting returned structure #VmaVirtualAllocationInfo. +If you allocated a new object to be used as the custom pointer, don't forget to delete that object before freeing the allocation! +Example: + +\code +VmaVirtualAllocationInfo allocInfo; +vmaGetVirtualAllocationInfo(block, alloc, &allocInfo); +delete (CustomAllocData*)allocInfo.pUserData; + +vmaVirtualFree(block, alloc); +\endcode + +\section virtual_allocator_alignment_and_units Alignment and units + +It feels natural to express sizes and offsets in bytes. +If an offset of an allocation needs to be aligned to a multiply of some number (e.g. 4 bytes), you can fill optional member +VmaVirtualAllocationCreateInfo::alignment to request it. Example: + +\code +VmaVirtualAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.size = 4096; // 4 KB +allocCreateInfo.alignment = 4; // Returned offset must be a multiply of 4 B + +VmaVirtualAllocation alloc; +res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc, nullptr); +\endcode + +Alignments of different allocations made from one block may vary. +However, if all alignments and sizes are always multiply of some size e.g. 4 B or `sizeof(MyDataStruct)`, +you can express all sizes, alignments, and offsets in multiples of that size instead of individual bytes. +It might be more convenient, but you need to make sure to use this new unit consistently in all the places: + +- VmaVirtualBlockCreateInfo::size +- VmaVirtualAllocationCreateInfo::size and VmaVirtualAllocationCreateInfo::alignment +- Using offset returned by vmaVirtualAllocate() or in VmaVirtualAllocationInfo::offset + +\section virtual_allocator_statistics Statistics + +You can obtain statistics of a virtual block using vmaGetVirtualBlockStatistics() +(to get brief statistics that are fast to calculate) +or vmaCalculateVirtualBlockStatistics() (to get more detailed statistics, slower to calculate). +The functions fill structures #VmaStatistics, #VmaDetailedStatistics respectively - same as used by the normal Vulkan memory allocator. +Example: + +\code +VmaStatistics stats; +vmaGetVirtualBlockStatistics(block, &stats); +printf("My virtual block has %llu bytes used by %u virtual allocations\n", + stats.allocationBytes, stats.allocationCount); +\endcode + +You can also request a full list of allocations and free regions as a string in JSON format by calling +vmaBuildVirtualBlockStatsString(). +Returned string must be later freed using vmaFreeVirtualBlockStatsString(). +The format of this string differs from the one returned by the main Vulkan allocator, but it is similar. + +\section virtual_allocator_additional_considerations Additional considerations + +The "virtual allocator" functionality is implemented on a level of individual memory blocks. +Keeping track of a whole collection of blocks, allocating new ones when out of free space, +deleting empty ones, and deciding which one to try first for a new allocation must be implemented by the user. + +Alternative allocation algorithms are supported, just like in custom pools of the real GPU memory. +See enum #VmaVirtualBlockCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT). +You can find their description in chapter \ref custom_memory_pools. +Allocation strategies are also supported. +See enum #VmaVirtualAllocationCreateFlagBits to learn how to specify them (e.g. #VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT). + +Following features are supported only by the allocator of the real GPU memory and not by virtual allocations: +buffer-image granularity, `VMA_DEBUG_MARGIN`, `VMA_MIN_ALIGNMENT`. + + +\page debugging_memory_usage Debugging incorrect memory usage + +If you suspect a bug with memory usage, like usage of uninitialized memory or +memory being overwritten out of bounds of an allocation, +you can use debug features of this library to verify this. + +\section debugging_memory_usage_initialization Memory initialization + +If you experience a bug with incorrect and nondeterministic data in your program and you suspect uninitialized memory to be used, +you can enable automatic memory initialization to verify this. +To do it, define macro `VMA_DEBUG_INITIALIZE_ALLOCATIONS` to 1. + +\code +#define VMA_DEBUG_INITIALIZE_ALLOCATIONS 1 +#include "vk_mem_alloc.h" +\endcode + +It makes memory of new allocations initialized to bit pattern `0xDCDCDCDC`. +Before an allocation is destroyed, its memory is filled with bit pattern `0xEFEFEFEF`. +Memory is automatically mapped and unmapped if necessary. + +If you find these values while debugging your program, good chances are that you incorrectly +read Vulkan memory that is allocated but not initialized, or already freed, respectively. + +Memory initialization works only with memory types that are `HOST_VISIBLE` and with allocations that can be mapped. +It works also with dedicated allocations. + +\section debugging_memory_usage_margins Margins + +By default, allocations are laid out in memory blocks next to each other if possible +(considering required alignment, `bufferImageGranularity`, and `nonCoherentAtomSize`). + +![Allocations without margin](../gfx/Margins_1.png) + +Define macro `VMA_DEBUG_MARGIN` to some non-zero value (e.g. 16) to enforce specified +number of bytes as a margin after every allocation. + +\code +#define VMA_DEBUG_MARGIN 16 +#include "vk_mem_alloc.h" +\endcode + +![Allocations with margin](../gfx/Margins_2.png) + +If your bug goes away after enabling margins, it means it may be caused by memory +being overwritten outside of allocation boundaries. It is not 100% certain though. +Change in application behavior may also be caused by different order and distribution +of allocations across memory blocks after margins are applied. + +Margins work with all types of memory. + +Margin is applied only to allocations made out of memory blocks and not to dedicated +allocations, which have their own memory block of specific size. +It is thus not applied to allocations made using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT flag +or those automatically decided to put into dedicated allocations, e.g. due to its +large size or recommended by VK_KHR_dedicated_allocation extension. + +Margins appear in [JSON dump](@ref statistics_json_dump) as part of free space. + +Note that enabling margins increases memory usage and fragmentation. + +Margins do not apply to \ref virtual_allocator. + +\section debugging_memory_usage_corruption_detection Corruption detection + +You can additionally define macro `VMA_DEBUG_DETECT_CORRUPTION` to 1 to enable validation +of contents of the margins. + +\code +#define VMA_DEBUG_MARGIN 16 +#define VMA_DEBUG_DETECT_CORRUPTION 1 +#include "vk_mem_alloc.h" +\endcode + +When this feature is enabled, number of bytes specified as `VMA_DEBUG_MARGIN` +(it must be multiply of 4) after every allocation is filled with a magic number. +This idea is also know as "canary". +Memory is automatically mapped and unmapped if necessary. + +This number is validated automatically when the allocation is destroyed. +If it is not equal to the expected value, `VMA_ASSERT()` is executed. +It clearly means that either CPU or GPU overwritten the memory outside of boundaries of the allocation, +which indicates a serious bug. + +You can also explicitly request checking margins of all allocations in all memory blocks +that belong to specified memory types by using function vmaCheckCorruption(), +or in memory blocks that belong to specified custom pool, by using function +vmaCheckPoolCorruption(). + +Margin validation (corruption detection) works only for memory types that are +`HOST_VISIBLE` and `HOST_COHERENT`. + + +\section debugging_memory_usage_leak_detection Leak detection features + +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. + +\section opengl_interop_exporting_memory Exporting memory + +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: + +You can create \ref custom_memory_pools for such allocations. +Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext +while creating the custom pool. +Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, +not only while creating it, as no copy of the structure is made, +but its original pointer is used for each allocation instead. + +If you want to export all memory allocated by VMA from certain memory types, +also dedicated allocations or other allocations made from default pools, +an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. +It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library +through `VkExportMemoryAllocateInfoKHR` on each allocation made from a specific memory type. +Please note that new versions of the library also support dedicated allocations created in custom pools. + +You should not mix these two methods in a way that allows to apply both to the same memory type. +Otherwise, `VkExportMemoryAllocateInfoKHR` structure would be attached twice to the `pNext` chain of `VkMemoryAllocateInfo`. + + +\section opengl_interop_custom_alignment Custom alignment + +Buffers or images exported to a different API like OpenGL may require a different alignment, +higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. +To impose such alignment: + +You can create \ref custom_memory_pools for such allocations. +Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation +to be made out of this pool. +The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image +from a function like `vkGetBufferMemoryRequirements`, which is called by VMA automatically. + +If you want to create a buffer with a specific minimum alignment out of default pools, +use special function vmaCreateBufferWithAlignment(), which takes additional parameter `minAlignment`. + +Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated +allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. + +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + + +\page usage_patterns Recommended usage patterns + +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + +See also slides from talk: +[Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) + + +\section usage_patterns_gpu_only GPU-only resource + +When: +Any resources that you frequently write and read on GPU, +e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, +images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). + +What to do: +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +Also consider: +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. +Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. +When VK_EXT_memory_priority extension is enabled, it is also worth setting high priority to such allocation +to decrease chances to be evicted to system memory by the operating system. + +\section usage_patterns_staging_copy_upload Staging copy for upload + +When: +A "staging" buffer than you want to map and fill from CPU code, then use as a source of transfer +to some GPU resource. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +Also consider: +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback + +When: +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, +e.g. results of some computations. + +What to do: +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode + + +\section usage_patterns_advanced_data_uploading Advanced data uploading + +For resources that you frequently write on CPU via mapped pointer and +frequently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: + +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performance of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. + +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. + +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... + +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); + +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. + + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); +} +else +{ + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize, // size + }; + + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT + + +\page configuration Configuration + +Please check "CONFIGURATION SECTION" in the code to find macros that you can define +before each include of this file or change directly in this file to provide +your own implementation of basic facilities like assert, `min()` and `max()` functions, +mutex, atomic etc. + +For example, define `VMA_ASSERT(expr)` before including the library to provide +custom implementation of the assertion, compatible with your project. +By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration +and empty otherwise. + +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + +\section config_Vulkan_functions Pointers to Vulkan functions + +There are multiple ways to import pointers to Vulkan functions in the library. +In the simplest case you don't need to do anything. +If the compilation or linking of your program or the initialization of the #VmaAllocator +doesn't work for you, you can try to reconfigure it. + +First, the allocator tries to fetch pointers to Vulkan functions linked statically, +like this: + +\code +m_VulkanFunctions.vkAllocateMemory = (PFN_vkAllocateMemory)vkAllocateMemory; +\endcode + +If you want to disable this feature, set configuration macro: `#define VMA_STATIC_VULKAN_FUNCTIONS 0`. + +Second, you can provide the pointers yourself by setting member VmaAllocatorCreateInfo::pVulkanFunctions. +You can fetch them e.g. using functions `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` or +by using a helper library like [volk](https://github.com/zeux/volk). + +Third, VMA tries to fetch remaining pointers that are still null by calling +`vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. +If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. + +Finally, all the function pointers required by the library (considering selected +Vulkan version and enabled extensions) are checked with `VMA_ASSERT` if they are not null. + + +\section custom_memory_allocator Custom host memory allocator + +If you use custom allocator for CPU memory rather than default operator `new` +and `delete` from C++, you can make this library using your allocator as well +by filling optional member VmaAllocatorCreateInfo::pAllocationCallbacks. These +functions will be passed to Vulkan, as well as used by the library itself to +make any CPU-side allocations. + +\section allocation_callbacks Device memory allocation callbacks + +The library makes calls to `vkAllocateMemory()` and `vkFreeMemory()` internally. +You can setup callbacks to be informed about these calls, e.g. for the purpose +of gathering some statistics. To do it, fill optional member +VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. + +\section heap_memory_limit Device heap memory limit + +When device memory of certain heap runs out of free space, new allocations may +fail (returning error code) or they may succeed, silently pushing some existing_ +memory blocks from GPU VRAM to system RAM (which degrades performance). This +behavior is implementation-dependent - it depends on GPU vendor and graphics +driver. + +On AMD cards it can be controlled while creating Vulkan device object by using +VK_AMD_memory_overallocation_behavior extension, if available. + +Alternatively, if you want to test how your program behaves with limited amount of Vulkan device +memory available without switching your graphics card to one that really has +smaller VRAM, you can use a feature of this library intended for this purpose. +To do it, fill optional member VmaAllocatorCreateInfo::pHeapSizeLimit. + + + +\page vk_khr_dedicated_allocation VK_KHR_dedicated_allocation + +VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve +performance on some GPUs. It augments Vulkan API with possibility to query +driver whether it prefers particular buffer or image to have its own, dedicated +allocation (separate `VkDeviceMemory` block) for better efficiency - to be able +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. + +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: + +1 . When creating Vulkan device, check if following 2 device extensions are +supported (call `vkEnumerateDeviceExtensionProperties()`). +If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). + +- VK_KHR_get_memory_requirements2 +- VK_KHR_dedicated_allocation + +If you enabled these extensions: + +2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating +your #VmaAllocator to inform the library that you enabled required extensions +and you want the library to use them. + +\code +allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; + +vmaCreateAllocator(&allocatorInfo, &allocator); +\endcode + +That is all. The extension will be automatically used whenever you create a +buffer using vmaCreateBuffer() or image using vmaCreateImage(). + +When using the extension together with Vulkan Validation Layer, you will receive +warnings like this: + +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ + +It is OK, you should just ignore it. It happens because you use function +`vkGetBufferMemoryRequirements2KHR()` instead of standard +`vkGetBufferMemoryRequirements()`, while the validation layer seems to be +unaware of it. + +To learn more about this extension, see: + +- [VK_KHR_dedicated_allocation in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap50.html#VK_KHR_dedicated_allocation) +- [VK_KHR_dedicated_allocation unofficial manual](http://asawicki.info/articles/VK_KHR_dedicated_allocation.php5) + + + +\page vk_ext_memory_priority VK_EXT_memory_priority + +VK_EXT_memory_priority is a device extension that allows to pass additional "priority" +value to Vulkan memory allocations that the implementation may use prefer certain +buffers and images that are critical for performance to stay in device-local memory +in cases when the memory is over-subscribed, while some others may be moved to the system memory. + +VMA offers convenient usage of this extension. +If you enable it, you can pass "priority" parameter when creating allocations or custom pools +and the library automatically passes the value to Vulkan using this extension. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_ext_memory_priority_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_EXT_memory_priority". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceMemoryPriorityFeaturesEXT::memoryPriority` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_EXT_memory_priority" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceMemoryPriorityFeaturesEXT` to +`VkPhysicalDeviceFeatures2::pNext` chain and set its member `memoryPriority` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_ext_memory_priority_usage Usage + +When using this extension, you should initialize following member: + +- VmaAllocationCreateInfo::priority when creating a dedicated allocation with #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +- VmaPoolCreateInfo::priority when creating a custom pool. + +It should be a floating-point value between `0.0f` and `1.0f`, where recommended default is `0.5f`. +Memory allocated with higher value can be treated by the Vulkan implementation as higher priority +and so it can have lower chances of being pushed out to system memory, experiencing degraded performance. + +It might be a good idea to create performance-critical resources like color-attachment or depth-stencil images +as dedicated and set high priority to them. For example: + +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; +allocCreateInfo.priority = 1.0f; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +`priority` member is ignored in the following situations: + +- Allocations created in custom pools: They inherit the priority, along with all other allocation parameters + from the parameters passed in #VmaPoolCreateInfo when the pool was created. +- Allocations created in default pools: They inherit the priority from the parameters + VMA used when creating default pools, which means `priority == 0.5f`. + + +\page vk_amd_device_coherent_memory VK_AMD_device_coherent_memory + +VK_AMD_device_coherent_memory is a device extension that enables access to +additional memory types with `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` and +`VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` flag. It is useful mostly for +allocation of buffers intended for writing "breadcrumb markers" in between passes +or draw calls, which in turn are useful for debugging GPU crash/hang/TDR cases. + +When the extension is available but has not been enabled, Vulkan physical device +still exposes those memory types, but their usage is forbidden. VMA automatically +takes care of that - it returns `VK_ERROR_FEATURE_NOT_PRESENT` when an attempt +to allocate memory of such type is made. + +If you want to use this extension in connection with VMA, follow these steps: + +\section vk_amd_device_coherent_memory_initialization Initialization + +1) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains "VK_AMD_device_coherent_memory". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceCoherentMemoryFeaturesAMD::deviceCoherentMemory` is true. + +3) While creating device with `vkCreateDevice`, enable this extension - add "VK_AMD_device_coherent_memory" +to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceCoherentMemoryFeaturesAMD` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `deviceCoherentMemory` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this extension and feature - add #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT +to VmaAllocatorCreateInfo::flags. + +\section vk_amd_device_coherent_memory_usage Usage + +After following steps described above, you can create VMA allocations and custom pools +out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligible +devices. There are multiple ways to do it, for example: + +- You can request or prefer to allocate out of such memory types by adding + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with + other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. +- If you manually found memory type index to use for this purpose, force allocation + from this specific index by setting VmaAllocationCreateInfo::memoryTypeBits `= 1u << index`. + +\section vk_amd_device_coherent_memory_more_information More information + +To learn more about this extension, see [VK_AMD_device_coherent_memory in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_AMD_device_coherent_memory.html) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + + +\page enabling_buffer_device_address Enabling buffer device address + +Device extension VK_KHR_buffer_device_address +allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. +It has been promoted to core Vulkan 1.2. + +If you want to use this feature in connection with VMA, follow these steps: + +\section enabling_buffer_device_address_initialization Initialization + +1) (For Vulkan version < 1.2) Call `vkEnumerateDeviceExtensionProperties` for the physical device. +Check if the extension is supported - if returned array of `VkExtensionProperties` contains +"VK_KHR_buffer_device_address". + +2) Call `vkGetPhysicalDeviceFeatures2` for the physical device instead of old `vkGetPhysicalDeviceFeatures`. +Attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to `VkPhysicalDeviceFeatures2::pNext` to be returned. +Check if the device feature is really supported - check if `VkPhysicalDeviceBufferDeviceAddressFeatures::bufferDeviceAddress` is true. + +3) (For Vulkan version < 1.2) While creating device with `vkCreateDevice`, enable this extension - add +"VK_KHR_buffer_device_address" to the list passed as `VkDeviceCreateInfo::ppEnabledExtensionNames`. + +4) While creating the device, also don't set `VkDeviceCreateInfo::pEnabledFeatures`. +Fill in `VkPhysicalDeviceFeatures2` structure instead and pass it as `VkDeviceCreateInfo::pNext`. +Enable this device feature - attach additional structure `VkPhysicalDeviceBufferDeviceAddressFeatures*` to +`VkPhysicalDeviceFeatures2::pNext` and set its member `bufferDeviceAddress` to `VK_TRUE`. + +5) While creating #VmaAllocator with vmaCreateAllocator() inform VMA that you +have enabled this feature - add #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +to VmaAllocatorCreateInfo::flags. + +\section enabling_buffer_device_address_usage Usage + +After following steps described above, you can create buffers with `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*` using VMA. +The library automatically adds `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT*` to +allocated memory blocks wherever it might be needed. + +Please note that the library supports only `VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT*`. +The second part of this functionality related to "capture and replay" is not supported, +as it is intended for usage in debugging tools like RenderDoc, not in everyday Vulkan usage. + +\section enabling_buffer_device_address_more_information More information + +To learn more about this extension, see [VK_KHR_buffer_device_address in Vulkan specification](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap46.html#VK_KHR_buffer_device_address) + +Example use of this extension can be found in the code of the sample and test suite +accompanying this library. + +\page general_considerations General considerations + +\section general_considerations_thread_safety Thread safety + +- The library has no global state, so separate #VmaAllocator objects can be used + independently. + There should be no need to create multiple such objects though - one per `VkDevice` is enough. +- By default, all calls to functions that take #VmaAllocator as first parameter + are safe to call from multiple threads simultaneously because they are + synchronized internally when needed. + This includes allocation and deallocation from default memory pool, as well as custom #VmaPool. +- When the allocator is created with #VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT + flag, calls to functions that take such #VmaAllocator object must be + synchronized externally. +- Access to a #VmaAllocation object must be externally synchronized. For example, + you must not call vmaGetAllocationInfo() and vmaMapMemory() from different + threads at the same time if you pass the same #VmaAllocation object to these + functions. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. + +\section general_considerations_versioning_and_compatibility Versioning and compatibility + +The library uses [**Semantic Versioning**](https://semver.org/), +which means version numbers follow convention: Major.Minor.Patch (e.g. 2.3.0), where: + +- Incremented Patch version means a release is backward- and forward-compatible, + introducing only some internal improvements, bug fixes, optimizations etc. + or changes that are out of scope of the official API described in this documentation. +- Incremented Minor version means a release is backward-compatible, + so existing code that uses the library should continue to work, while some new + symbols could have been added: new structures, functions, new values in existing + enums and bit flags, new structure members, but not new function parameters. +- Incrementing Major version means a release could break some backward compatibility. + +All changes between official releases are documented in file "CHANGELOG.md". + +\warning Backward compatibility is considered on the level of C++ source code, not binary linkage. +Adding new members to existing structures is treated as backward compatible if initializing +the new members to binary zero results in the old behavior. +You should always fully initialize all library structures to zeros and not rely on their +exact binary size. + +\section general_considerations_validation_layer_warnings Validation layer warnings + +When using this library, you can meet following types of warnings issued by +Vulkan validation layer. They don't necessarily indicate a bug, so you may need +to just ignore them. + +- *vkBindBufferMemory(): Binding memory to buffer 0xeb8e4 but vkGetBufferMemoryRequirements() has not been called on that buffer.* + - It happens when VK_KHR_dedicated_allocation extension is enabled. + `vkGetBufferMemoryRequirements2KHR` function is used instead, while validation layer seems to be unaware of it. +- *Mapping an image with layout VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL can result in undefined behavior if this memory is used by the device. Only GENERAL or PREINITIALIZED should be used.* + - It happens when you map a buffer or image, because the library maps entire + `VkDeviceMemory` block, where different types of images and buffers may end + up together, especially on GPUs with unified memory like Intel. +- *Non-linear image 0xebc91 is aliased with linear buffer 0xeb8e4 which may indicate a bug.* + - It may happen when you use [defragmentation](@ref defragmentation). + +\section general_considerations_allocation_algorithm Allocation algorithm + +The library uses following algorithm for allocation, in order: + +-# Try to find free range of memory in existing blocks. +-# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. +-# If failed, try to create such block with size / 2, size / 4, size / 8. +-# If failed, try to allocate separate `VkDeviceMemory` for this allocation, + just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +-# If failed, choose other memory type that meets the requirements specified in + VmaAllocationCreateInfo and go to point 1. +-# If failed, return `VK_ERROR_OUT_OF_DEVICE_MEMORY`. + +\section general_considerations_features_not_supported Features not supported + +Features deliberately excluded from the scope of this library: + +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. +*/ + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(MSVC) +# pragma warning(pop) +#endif diff --git a/Include/Extensions/NRIDeviceCreation.h b/Include/Extensions/NRIDeviceCreation.h index b98e9f17..eaa343ed 100644 --- a/Include/Extensions/NRIDeviceCreation.h +++ b/Include/Extensions/NRIDeviceCreation.h @@ -15,7 +15,7 @@ NRI_ENUM MAX_NUM ); -NRI_STRUCT(MemoryAllocatorInterface) +NRI_STRUCT(AllocationCallbacks) { void* (*Allocate)(void* userArg, size_t size, size_t alignment); void* (*Reallocate)(void* userArg, void* memory, size_t size, size_t alignment); @@ -49,9 +49,9 @@ NRI_STRUCT(VKExtensions) NRI_STRUCT(DeviceCreationDesc) { - const NRI_NAME(AdapterDesc)* adapterDesc; // optional + NRI_OPTIONAL const NRI_NAME(AdapterDesc)* adapterDesc; NRI_NAME(CallbackInterface) callbackInterface; - NRI_NAME(MemoryAllocatorInterface) memoryAllocatorInterface; + NRI_NAME(AllocationCallbacks) allocationCallbacks; NRI_NAME(SPIRVBindingOffsets) spirvBindingOffsets; NRI_NAME(VKExtensions) vkExtensions; NRI_NAME(GraphicsAPI) graphicsAPI; @@ -66,6 +66,7 @@ NRI_STRUCT(DeviceCreationDesc) // Switches (enabled by default) bool disableVKRayTracing; // to save CPU memory in some implementations + bool disable3rdPartyAllocationCallbacks; // to use "allocationCallbacks" only for NRI needs }; NRI_API NRI_NAME(Result) NRI_CALL nriEnumerateAdapters(NRI_NAME(AdapterDesc)* adapterDescs, uint32_t NRI_REF adapterDescNum); diff --git a/Include/Extensions/NRIRayTracing.h b/Include/Extensions/NRIRayTracing.h index 20e14568..7c5d68fd 100644 --- a/Include/Extensions/NRIRayTracing.h +++ b/Include/Extensions/NRIRayTracing.h @@ -203,7 +203,7 @@ NRI_STRUCT(DispatchRaysIndirectDesc) NRI_STRUCT(RayTracingInterface) { // Get - void (NRI_CALL *GetAccelerationStructureMemoryDesc)(const NRI_NAME_REF(AccelerationStructure) accelerationStructure, NRI_NAME_REF(MemoryDesc) memoryDesc); + void (NRI_CALL *GetAccelerationStructureMemoryDesc)(const NRI_NAME_REF(Device) device, const NRI_NAME_REF(AccelerationStructureDesc) accelerationStructureDesc, NRI_NAME(MemoryLocation) memoryLocation, NRI_NAME_REF(MemoryDesc) memoryDesc); uint64_t (NRI_CALL *GetAccelerationStructureUpdateScratchBufferSize)(const NRI_NAME_REF(AccelerationStructure) accelerationStructure); uint64_t (NRI_CALL *GetAccelerationStructureBuildScratchBufferSize)(const NRI_NAME_REF(AccelerationStructure) accelerationStructure); uint64_t (NRI_CALL *GetAccelerationStructureHandle)(const NRI_NAME_REF(AccelerationStructure) accelerationStructure); diff --git a/Include/Extensions/NRIResourceAllocator.h b/Include/Extensions/NRIResourceAllocator.h new file mode 100644 index 00000000..8c09b97a --- /dev/null +++ b/Include/Extensions/NRIResourceAllocator.h @@ -0,0 +1,40 @@ +// © 2021 NVIDIA Corporation + +#pragma once + +// Convenient creation of resources, which get returned already bound to memory. +// AMD Virtual Memory Allocator is used for memory allocations management: +// https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator +// https://github.com/GPUOpen-LibrariesAndSDKs/D3D12MemoryAllocator + +NRI_NAMESPACE_BEGIN + +NRI_STRUCT(AllocateBufferDesc) +{ + NRI_NAME(BufferDesc) desc; + NRI_NAME(MemoryLocation) memoryLocation; + float memoryPriority; // [-1; 1]: low < 0, normal = 0, high > 0 +}; + +NRI_STRUCT(AllocateTextureDesc) +{ + NRI_NAME(TextureDesc) desc; + NRI_NAME(MemoryLocation) memoryLocation; + float memoryPriority; +}; + +NRI_STRUCT(AllocateAccelerationStructureDesc) +{ + NRI_NAME(AccelerationStructureDesc) desc; + NRI_NAME(MemoryLocation) memoryLocation; + float memoryPriority; +}; + +NRI_STRUCT(ResourceAllocatorInterface) +{ + NRI_NAME(Result) (NRI_CALL *AllocateBuffer)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(AllocateBufferDesc) bufferDesc, NRI_NAME_REF(Buffer*) buffer); + NRI_NAME(Result) (NRI_CALL *AllocateTexture)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(AllocateTextureDesc) textureDesc, NRI_NAME_REF(Texture*) texture); + NRI_NAME(Result) (NRI_CALL *AllocateAccelerationStructure)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(AllocateAccelerationStructureDesc) accelerationStructureDesc, NRI_NAME_REF(AccelerationStructure*) accelerationStructure); +}; + +NRI_NAMESPACE_END diff --git a/Include/Extensions/NRIStreamer.h b/Include/Extensions/NRIStreamer.h index b94d2bd6..b489cdcd 100644 --- a/Include/Extensions/NRIStreamer.h +++ b/Include/Extensions/NRIStreamer.h @@ -8,9 +8,9 @@ NRI_FORWARD_STRUCT(Streamer); NRI_STRUCT(StreamerDesc) { - // Statically allocated ring-buffer for dynamic constants (optional) - NRI_NAME(MemoryLocation) constantBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD - uint64_t constantBufferSize; + // Statically allocated ring-buffer for dynamic constants + NRI_OPTIONAL NRI_NAME(MemoryLocation) constantBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD + NRI_OPTIONAL uint64_t constantBufferSize; // Dynamically (re)allocated ring-buffer for copying and rendering (mandatory) NRI_NAME(MemoryLocation) dynamicBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD @@ -24,9 +24,9 @@ NRI_STRUCT(BufferUpdateRequestDesc) const void* data; // pointer must be valid until "CopyStreamerUpdateRequests" call uint64_t dataSize; - // Destination (optional, ignored for constants) - NRI_NAME(Buffer)* dstBuffer; - uint64_t dstBufferOffset; + // Destination (ignored for constants) + NRI_OPTIONAL NRI_NAME(Buffer)* dstBuffer; + NRI_OPTIONAL uint64_t dstBufferOffset; }; NRI_STRUCT(TextureUpdateRequestDesc) @@ -36,7 +36,7 @@ NRI_STRUCT(TextureUpdateRequestDesc) uint32_t dataRowPitch; uint32_t dataSlicePitch; - // Destination (mandatory) + // Destination NRI_NAME(Texture)* dstTexture; NRI_NAME(TextureRegionDesc) dstRegionDesc; }; diff --git a/Include/Extensions/NRIWrapperD3D11.h b/Include/Extensions/NRIWrapperD3D11.h index 7bd16ebd..dd455be8 100644 --- a/Include/Extensions/NRIWrapperD3D11.h +++ b/Include/Extensions/NRIWrapperD3D11.h @@ -14,12 +14,12 @@ NRI_NAMESPACE_BEGIN NRI_STRUCT(DeviceCreationD3D11Desc) { ID3D11Device* d3d11Device; - AGSContext* agsContext; // can be NULL + NRI_OPTIONAL AGSContext* agsContext; NRI_NAME(CallbackInterface) callbackInterface; - NRI_NAME(MemoryAllocatorInterface) memoryAllocatorInterface; + NRI_NAME(AllocationCallbacks) allocationCallbacks; bool enableD3D11CommandBufferEmulation; bool enableNRIValidation; - bool isNVAPILoaded; // At least NVAPI requires calling "NvAPI_Initialize" in DLL/EXE where the device is created in addition to NRI + bool isNVAPILoaded; // at least NVAPI requires calling "NvAPI_Initialize" in DLL/EXE where the device is created in addition to NRI }; NRI_STRUCT(CommandBufferD3D11Desc) @@ -30,13 +30,13 @@ NRI_STRUCT(CommandBufferD3D11Desc) NRI_STRUCT(BufferD3D11Desc) { ID3D11Resource* d3d11Resource; - const NRI_NAME(BufferDesc)* desc; // Can be NULL, but not all information can be retrieved from the resource + NRI_OPTIONAL const NRI_NAME(BufferDesc)* desc; // not all information can be retrieved from the resource if not provided }; NRI_STRUCT(TextureD3D11Desc) { ID3D11Resource* d3d11Resource; - const NRI_NAME(TextureDesc)* desc; // Can be NULL, but not all information can be retrieved from the resource + NRI_OPTIONAL const NRI_NAME(TextureDesc)* desc; // not all information can be retrieved from the resource if not provided }; NRI_STRUCT(WrapperD3D11Interface) diff --git a/Include/Extensions/NRIWrapperD3D12.h b/Include/Extensions/NRIWrapperD3D12.h index a40caa08..9cba8b5e 100644 --- a/Include/Extensions/NRIWrapperD3D12.h +++ b/Include/Extensions/NRIWrapperD3D12.h @@ -23,12 +23,12 @@ NRI_STRUCT(DeviceCreationD3D12Desc) ID3D12CommandQueue* d3d12GraphicsQueue; ID3D12CommandQueue* d3d12ComputeQueue; ID3D12CommandQueue* d3d12CopyQueue; - AGSContext* agsContext; // can be NULL + NRI_OPTIONAL AGSContext* agsContext; NRI_NAME(CallbackInterface) callbackInterface; - NRI_NAME(MemoryAllocatorInterface) memoryAllocatorInterface; + NRI_NAME(AllocationCallbacks) allocationCallbacks; bool enableD3D12DrawParametersEmulation; bool enableNRIValidation; - bool isNVAPILoaded; // At least NVAPI requires calling "NvAPI_Initialize" in DLL/EXE where the device is created in addition to NRI + bool isNVAPILoaded; // at least NVAPI requires calling "NvAPI_Initialize" in DLL/EXE where the device is created in addition to NRI }; NRI_STRUCT(CommandBufferD3D12Desc) @@ -47,20 +47,19 @@ NRI_STRUCT(DescriptorPoolD3D12Desc) NRI_STRUCT(BufferD3D12Desc) { ID3D12Resource* d3d12Resource; - const NRI_NAME(BufferDesc)* desc; // Can be NULL, but not all information can be retrieved from the resource - uint32_t structureStride; + NRI_OPTIONAL const NRI_NAME(BufferDesc)* desc; // not all information can be retrieved from the resource if not provided + NRI_OPTIONAL uint32_t structureStride; // must be provided if used as a structured or raw buffer }; NRI_STRUCT(TextureD3D12Desc) { ID3D12Resource* d3d12Resource; - const NRI_NAME(TextureDesc)* desc; // Can be NULL, but not all information can be retrieved from the resource + NRI_OPTIONAL const NRI_NAME(TextureDesc)* desc; // not all information can be retrieved from the resource if not provided }; NRI_STRUCT(MemoryD3D12Desc) { ID3D12Heap* d3d12Heap; - const D3D12_HEAP_DESC* d3d12HeapDesc; }; NRI_STRUCT(AccelerationStructureD3D12Desc) diff --git a/Include/Extensions/NRIWrapperVK.h b/Include/Extensions/NRIWrapperVK.h index 70090345..67441bf1 100644 --- a/Include/Extensions/NRIWrapperVK.h +++ b/Include/Extensions/NRIWrapperVK.h @@ -6,81 +6,68 @@ NRI_NAMESPACE_BEGIN +// Forward and "mimic" declarations NRI_FORWARD_STRUCT(AccelerationStructure); - -typedef uint64_t NRIVkCommandPool; -typedef uint64_t NRIVkImage; -typedef uint64_t NRIVkBuffer; -typedef uint64_t NRIVkDeviceMemory; -typedef uint64_t NRIVkQueryPool; -typedef uint64_t NRIVkPipeline; -typedef uint64_t NRIVkDescriptorPool; -typedef uint64_t NRIVkImageView; -typedef uint64_t NRIVkBufferView; -typedef uint64_t NRIVkAccelerationStructureKHR; - -typedef void* NRIVkInstance; -typedef void* NRIVkPhysicalDevice; -typedef void* NRIVkDevice; -typedef void* NRIVkQueue; -typedef void* NRIVkCommandBuffer; +typedef void* VKHandle; +typedef uint64_t VKNonDispatchableHandle; +typedef int32_t VKEnum; +typedef uint32_t VKFlags; NRI_STRUCT(DeviceCreationVKDesc) { NRI_NAME(CallbackInterface) callbackInterface; - NRI_NAME(MemoryAllocatorInterface) memoryAllocatorInterface; + NRI_NAME(AllocationCallbacks) allocationCallbacks; NRI_NAME(SPIRVBindingOffsets) spirvBindingOffsets; NRI_NAME(VKExtensions) enabledExtensions; - NRIVkInstance vkInstance; - NRIVkDevice vkDevice; - NRIVkPhysicalDevice vkPhysicalDevice; + VKHandle vkInstance; + VKHandle vkDevice; + VKHandle vkPhysicalDevice; const uint32_t* queueFamilyIndices; uint32_t queueFamilyIndexNum; - const char* vulkanLoaderPath; + const char* libraryPath; uint8_t minorVersion; // >= 2 }; NRI_STRUCT(CommandQueueVKDesc) { - NRIVkQueue vkQueue; + VKHandle vkQueue; uint32_t queueFamilyIndex; NRI_NAME(CommandQueueType) commandQueueType; }; NRI_STRUCT(CommandAllocatorVKDesc) { - NRIVkCommandPool vkCommandPool; + VKNonDispatchableHandle vkCommandPool; NRI_NAME(CommandQueueType) commandQueueType; }; NRI_STRUCT(CommandBufferVKDesc) { - NRIVkCommandBuffer vkCommandBuffer; + VKHandle vkCommandBuffer; NRI_NAME(CommandQueueType) commandQueueType; }; NRI_STRUCT(DescriptorPoolVKDesc) { - NRIVkDescriptorPool vkDescriptorPool; + VKNonDispatchableHandle vkDescriptorPool; uint32_t descriptorSetMaxNum; }; NRI_STRUCT(BufferVKDesc) { - NRIVkBuffer vkBuffer; - NRI_NAME(Memory)* memory; + VKNonDispatchableHandle vkBuffer; uint64_t size; - uint64_t memoryOffset; - uint64_t deviceAddress; - uint32_t structureStride; + NRI_OPTIONAL uint32_t structureStride; // must be provided if used as a structured or raw buffer + NRI_OPTIONAL uint8_t* mappedMemory; // must be provided if the underlying memory is mapped + NRI_OPTIONAL VKNonDispatchableHandle vkDeviceMemory; // must be provided *only* if the mapped memory exists and *not* HOST_COHERENT + NRI_OPTIONAL uint64_t deviceAddress; // must be provided for ray tracing }; NRI_STRUCT(TextureVKDesc) { - NRIVkImage vkImage; - uint32_t vkFormat; - uint32_t vkImageAspectFlags; - uint32_t vkImageType; + VKNonDispatchableHandle vkImage; + VKEnum vkFormat; + VKEnum vkImageType; NRI_NAME(Dim_t) width; NRI_NAME(Dim_t) height; NRI_NAME(Dim_t) depth; @@ -91,7 +78,7 @@ NRI_STRUCT(TextureVKDesc) NRI_STRUCT(MemoryVKDesc) { - NRIVkDeviceMemory vkDeviceMemory; + VKNonDispatchableHandle vkDeviceMemory; void* vkMappedMemory; uint64_t size; uint32_t memoryTypeIndex; @@ -99,13 +86,13 @@ NRI_STRUCT(MemoryVKDesc) NRI_STRUCT(QueryPoolVKDesc) { - NRIVkQueryPool vkQueryPool; - uint32_t vkQueryType; + VKNonDispatchableHandle vkQueryPool; + VKEnum vkQueryType; }; NRI_STRUCT(AccelerationStructureVKDesc) { - NRIVkAccelerationStructureKHR vkAccelerationStructure; + VKNonDispatchableHandle vkAccelerationStructure; uint64_t buildScratchSize; uint64_t updateScratchSize; }; @@ -119,15 +106,14 @@ NRI_STRUCT(WrapperVKInterface) NRI_NAME(Result) (NRI_CALL *CreateBufferVK)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(BufferVKDesc) bufferVKDesc, NRI_NAME_REF(Buffer*) buffer); NRI_NAME(Result) (NRI_CALL *CreateTextureVK)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(TextureVKDesc) textureVKDesc, NRI_NAME_REF(Texture*) texture); NRI_NAME(Result) (NRI_CALL *CreateMemoryVK)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(MemoryVKDesc) memoryVKDesc, NRI_NAME_REF(Memory*) memory); - NRI_NAME(Result) (NRI_CALL *CreateGraphicsPipelineVK)(NRI_NAME_REF(Device) device, NRIVkPipeline vkPipeline, NRI_NAME_REF(Pipeline*) pipeline); - NRI_NAME(Result) (NRI_CALL *CreateComputePipelineVK)(NRI_NAME_REF(Device) device, NRIVkPipeline vkPipeline, NRI_NAME_REF(Pipeline*) pipeline); + NRI_NAME(Result) (NRI_CALL *CreateGraphicsPipelineVK)(NRI_NAME_REF(Device) device, VKNonDispatchableHandle vkPipeline, NRI_NAME_REF(Pipeline*) pipeline); + NRI_NAME(Result) (NRI_CALL *CreateComputePipelineVK)(NRI_NAME_REF(Device) device, VKNonDispatchableHandle vkPipeline, NRI_NAME_REF(Pipeline*) pipeline); NRI_NAME(Result) (NRI_CALL *CreateQueryPoolVK)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(QueryPoolVKDesc) queryPoolVKDesc, NRI_NAME_REF(QueryPool*) queryPool); NRI_NAME(Result) (NRI_CALL *CreateAccelerationStructureVK)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(AccelerationStructureVKDesc) accelerationStructureVKDesc, NRI_NAME_REF(AccelerationStructure*) accelerationStructure); - - NRIVkPhysicalDevice (NRI_CALL *GetVkPhysicalDevice)(const NRI_NAME_REF(Device) device); - NRIVkInstance (NRI_CALL *GetVkInstance)(const NRI_NAME_REF(Device) device); - void* (NRI_CALL *GetVkGetInstanceProcAddr)(const NRI_NAME_REF(Device) device); - void* (NRI_CALL *GetVkGetDeviceProcAddr)(const NRI_NAME_REF(Device) device); + VKHandle (NRI_CALL *GetPhysicalDeviceVK)(const NRI_NAME_REF(Device) device); + VKHandle (NRI_CALL *GetInstanceVK)(const NRI_NAME_REF(Device) device); + void* (NRI_CALL *GetInstanceProcAddrVK)(const NRI_NAME_REF(Device) device); + void* (NRI_CALL *GetDeviceProcAddrVK)(const NRI_NAME_REF(Device) device); }; NRI_API NRI_NAME(Result) NRI_CALL nriCreateDeviceFromVkDevice(const NRI_NAME_REF(DeviceCreationVKDesc) deviceDesc, NRI_NAME_REF(Device*) device); diff --git a/Include/NRI.h b/Include/NRI.h index 04d1827a..2bca7de3 100644 --- a/Include/NRI.h +++ b/Include/NRI.h @@ -25,8 +25,8 @@ Non-goals: #include #define NRI_VERSION_MAJOR 1 -#define NRI_VERSION_MINOR 140 -#define NRI_VERSION_DATE "6 August 2024" +#define NRI_VERSION_MINOR 141 +#define NRI_VERSION_DATE "23 August 2024" #ifdef _WIN32 #define NRI_CALL __fastcall @@ -35,7 +35,7 @@ Non-goals: #endif #ifndef NRI_API - #if NRI_STATIC_LIBRARY + #ifdef NRI_STATIC_LIBRARY #define NRI_API #elif defined(__cplusplus) #define NRI_API extern "C" @@ -64,6 +64,8 @@ NRI_STRUCT(CoreInterface) NRI_NAME(Result) (NRI_CALL *GetCommandQueue)(NRI_NAME_REF(Device) device, NRI_NAME(CommandQueueType) commandQueueType, NRI_NAME_REF(CommandQueue*) commandQueue); // Create + // "Creation" doesn't assume allocation of big chunks of memory on the device, but it happens for some entities implicitly + // "Allocation" emphasizes the fact that there is a chunk of memory allocated under the hood NRI_NAME(Result) (NRI_CALL *CreateCommandAllocator)(const NRI_NAME_REF(CommandQueue) commandQueue, NRI_NAME_REF(CommandAllocator*) commandAllocator); NRI_NAME(Result) (NRI_CALL *CreateCommandBuffer)(NRI_NAME_REF(CommandAllocator) commandAllocator, NRI_NAME_REF(CommandBuffer*) commandBuffer); NRI_NAME(Result) (NRI_CALL *CreateDescriptorPool)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(DescriptorPoolDesc) descriptorPoolDesc, NRI_NAME_REF(DescriptorPool*) descriptorPool); @@ -93,10 +95,14 @@ NRI_STRUCT(CoreInterface) void (NRI_CALL *DestroyFence)(NRI_NAME_REF(Fence) fence); // Memory - // - use "GetBufferMemoryDesc" (or "GetTextureMemoryDesc") to get "MemoryDesc" ("usageBits" and "MemoryLocation" affect returned "MemoryType") - // - (optional) group returned "MemoryDesc"s by "MemoryType", but do not group if "mustBeDedicated = true" - // - call "BindBufferMemory" (or "BindTextureMemory") to bind resources to "Memory" objects - // => "CalculateAllocationNumber" and "AllocateAndBindMemory" simplify this process for static allocations + // Low level: + // - use "Get[Resource]MemoryDesc" to get "MemoryDesc" ("usageBits" and "MemoryLocation" affect returned "MemoryType") + // - (optional) group returned "MemoryDesc"s by "MemoryType", but don't group if "mustBeDedicated = true" + // - call "Bind[Resource]Memory" to bind resources to "Memory" objects + // Mid level: + // - "CalculateAllocationNumber" and "AllocateAndBindMemory" simplify this process for buffers and textures + // High level: + // - "ResourceAllocatorInterface" allows to create resources already bound to memory NRI_NAME(Result) (NRI_CALL *AllocateMemory)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(AllocateMemoryDesc) allocateMemoryDesc, NRI_NAME_REF(Memory*) memory); NRI_NAME(Result) (NRI_CALL *BindBufferMemory)(NRI_NAME_REF(Device) device, const NRI_NAME(BufferMemoryBindingDesc)* memoryBindingDescs, uint32_t memoryBindingDescNum); NRI_NAME(Result) (NRI_CALL *BindTextureMemory)(NRI_NAME_REF(Device) device, const NRI_NAME(TextureMemoryBindingDesc)* memoryBindingDescs, uint32_t memoryBindingDescNum); diff --git a/Include/NRIDescs.h b/Include/NRIDescs.h index 822264e8..834ac85d 100644 --- a/Include/NRIDescs.h +++ b/Include/NRIDescs.h @@ -38,6 +38,9 @@ static const bool NRI_CONST_NAME(VARIABLE_DESCRIPTOR_NUM) = true; // helper for static const bool NRI_CONST_NAME(DESCRIPTOR_ARRAY) = true; // helper for "DescriptorRangeDesc::isArray" static const bool NRI_CONST_NAME(PARTIALLY_BOUND) = true; // helper for "DescriptorSetDesc::partiallyBound" +// Readability +#define NRI_OPTIONAL // i.e. can be 0 (keep an eye on comments) + //=============================================================================================================================== // Common //=============================================================================================================================== @@ -222,7 +225,7 @@ NRI_ENUM_BITS // Compute // Invoked by "CmdDispatch*" (not Rays) COMPUTE_SHADER = NRI_SET_BIT(10), // Compute shader - // Ray tracing // Invoked by "CmdDispatchRays" + // Ray tracing // Invoked by "CmdDispatchRays*" RAYGEN_SHADER = NRI_SET_BIT(11), // Ray generation shader MISS_SHADER = NRI_SET_BIT(12), // Miss shader INTERSECTION_SHADER = NRI_SET_BIT(13), // Intersection shader @@ -236,7 +239,7 @@ NRI_ENUM_BITS ACCELERATION_STRUCTURE = NRI_SET_BIT(19), // Invoked by "Cmd*AccelerationStructure*" // Modifiers - INDIRECT = NRI_SET_BIT(20), // Invoked by "Indirect" command (used as addition to other bits) + INDIRECT = NRI_SET_BIT(20), // Invoked by "Indirect" command (used in addition to other bits) // Umbrella stages TESSELLATION_SHADERS = NRI_ENUM_MEMBER_UNSCOPED(StageBits, TESS_CONTROL_SHADER) | @@ -1060,10 +1063,10 @@ NRI_STRUCT(ShaderDesc) NRI_STRUCT(GraphicsPipelineDesc) { const NRI_NAME(PipelineLayout)* pipelineLayout; - const NRI_NAME(VertexInputDesc)* vertexInput; // optional + NRI_OPTIONAL const NRI_NAME(VertexInputDesc)* vertexInput; NRI_NAME(InputAssemblyDesc) inputAssembly; NRI_NAME(RasterizationDesc) rasterization; - const NRI_NAME(MultisampleDesc)* multisample; // optional + NRI_OPTIONAL const NRI_NAME(MultisampleDesc)* multisample; NRI_NAME(OutputMergerDesc) outputMerger; const NRI_NAME(ShaderDesc)* shaders; uint32_t shaderNum; @@ -1218,7 +1221,7 @@ NRI_STRUCT(MemoryDesc) uint64_t size; uint32_t alignment; NRI_NAME(MemoryType) type; - bool mustBeDedicated; + bool mustBeDedicated; // must be put into a dedicated Memory, containing only 1 object with offset = 0 }; NRI_STRUCT(AllocateMemoryDesc) @@ -1412,20 +1415,9 @@ NRI_ENUM MAX_NUM ); -// Provided for convinience, when a memory chunk needs to be allocated in advance and it's unclear which resource categories can be placed together -NRI_ENUM -( - MemoryTier, uint8_t, - - ONE, // A memory can only support resources from a single resource category (buffers, color and depth-stencil attachments, all other textures) - TWO, // A memory can support resources from all 3 categories - - MAX_NUM -); - NRI_STRUCT(AdapterDesc) { - char description[128]; + char name[256]; uint64_t luid; uint64_t videoMemorySize; uint64_t systemMemorySize; @@ -1473,17 +1465,20 @@ NRI_STRUCT(DeviceDesc) uint64_t deviceUploadHeapSize; // ReBAR uint32_t memoryAllocationMaxNum; uint32_t samplerAllocationMaxNum; - uint32_t uploadBufferTextureRowAlignment; - uint32_t uploadBufferTextureSliceAlignment; - uint32_t typedBufferOffsetAlignment; - uint32_t constantBufferOffsetAlignment; uint32_t constantBufferMaxRange; - uint32_t storageBufferOffsetAlignment; uint32_t storageBufferMaxRange; uint32_t bufferTextureGranularity; uint64_t bufferMaxSize; uint32_t pushConstantsMaxSize; - NRI_NAME(MemoryTier) memoryTier; + + // Memory alignment + uint32_t uploadBufferTextureRowAlignment; + uint32_t uploadBufferTextureSliceAlignment; + uint32_t typedBufferOffsetAlignment; + uint32_t constantBufferOffsetAlignment; + uint32_t storageBufferOffsetAlignment; + uint32_t rayTracingShaderTableAlignment; + uint32_t rayTracingScratchAlignment; // Shader resources uint32_t boundDescriptorSetMaxNum; @@ -1537,11 +1532,9 @@ NRI_STRUCT(DeviceDesc) // Ray tracing uint32_t rayTracingShaderGroupIdentifierSize; - uint32_t rayTracingShaderTableAlignment; uint32_t rayTracingShaderTableMaxStride; uint32_t rayTracingShaderRecursionMaxDepth; uint32_t rayTracingGeometryObjectMaxNum; - uint32_t rayTracingScratchAlignment; // Mesh shaders uint32_t meshControlSharedMemoryMaxSize; @@ -1588,6 +1581,7 @@ NRI_STRUCT(DeviceDesc) uint32_t isDrawMeshTasksIndirectSupported : 1; uint32_t isMeshShaderPipelineStatsSupported : 1; uint32_t isEnchancedBarrierSupported : 1; // aka - can "Layout" be ignored? + uint32_t isMemoryTier2Supported : 1; // a memory object can support resources from all 3 categories (buffers, attachments, all other textures) // Shader features uint32_t isShaderNativeI16Supported : 1; diff --git a/Resources/Version.h b/Resources/Version.h index 6b1db151..bdfe502f 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -4,7 +4,7 @@ #define STR(x) STR_HELPER(x) #define VERSION_MAJOR 1 -#define VERSION_MINOR 140 +#define VERSION_MINOR 141 #define VERSION_BUILD 0 #define VERSION_REVISION 0 diff --git a/Source/Creation/Creation.cpp b/Source/Creation/Creation.cpp index bc4af58d..680c88f6 100644 --- a/Source/Creation/Creation.cpp +++ b/Source/Creation/Creation.cpp @@ -17,7 +17,6 @@ Result CreateDeviceD3D12(const DeviceCreationD3D12Desc& deviceCreationDesc, Devi #if NRI_USE_VULKAN Result CreateDeviceVK(const DeviceCreationDesc& deviceCreationDesc, DeviceBase*& device); Result CreateDeviceVK(const DeviceCreationVKDesc& deviceDesc, DeviceBase*& device); -bool QueryVideoMemoryInfoVK(const Device& device, VideoMemoryInfo& videoMemoryInfo); #endif DeviceBase* CreateDeviceValidation(const DeviceCreationDesc& deviceCreationDesc, DeviceBase& device); @@ -40,6 +39,30 @@ NRI_API Result NRI_CALL nriGetInterface(const Device& device, const char* interf realInterfaceSize = sizeof(HelperInterface); if (realInterfaceSize == interfaceSize) result = deviceBase.FillFunctionTable(*(HelperInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::LowLatencyInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(LowLatencyInterface)))) { + realInterfaceSize = sizeof(LowLatencyInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(LowLatencyInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::MeshShaderInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(MeshShaderInterface)))) { + realInterfaceSize = sizeof(MeshShaderInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(MeshShaderInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::RayTracingInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(RayTracingInterface)))) { + realInterfaceSize = sizeof(RayTracingInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(RayTracingInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::StreamerInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(StreamerInterface)))) { + realInterfaceSize = sizeof(StreamerInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(StreamerInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::SwapChainInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(SwapChainInterface)))) { + realInterfaceSize = sizeof(SwapChainInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(SwapChainInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::ResourceAllocatorInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(ResourceAllocatorInterface)))) { + realInterfaceSize = sizeof(ResourceAllocatorInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(ResourceAllocatorInterface*)interfacePtr); } else if (hash == Hash(NRI_STRINGIFY(nri::WrapperD3D11Interface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(WrapperD3D11Interface)))) { realInterfaceSize = sizeof(WrapperD3D11Interface); if (realInterfaceSize == interfaceSize) @@ -52,26 +75,6 @@ NRI_API Result NRI_CALL nriGetInterface(const Device& device, const char* interf realInterfaceSize = sizeof(WrapperVKInterface); if (realInterfaceSize == interfaceSize) result = deviceBase.FillFunctionTable(*(WrapperVKInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::SwapChainInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(SwapChainInterface)))) { - realInterfaceSize = sizeof(SwapChainInterface); - if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(SwapChainInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::RayTracingInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(RayTracingInterface)))) { - realInterfaceSize = sizeof(RayTracingInterface); - if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(RayTracingInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::MeshShaderInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(MeshShaderInterface)))) { - realInterfaceSize = sizeof(MeshShaderInterface); - if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(MeshShaderInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::LowLatencyInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(LowLatencyInterface)))) { - realInterfaceSize = sizeof(LowLatencyInterface); - if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(LowLatencyInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::StreamerInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(StreamerInterface)))) { - realInterfaceSize = sizeof(StreamerInterface); - if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(StreamerInterface*)interfacePtr); } if (result == Result::INVALID_ARGUMENT) @@ -106,7 +109,7 @@ NRI_API Result NRI_CALL nriCreateDevice(const DeviceCreationDesc& deviceCreation DeviceCreationDesc modifiedDeviceCreationDesc = deviceCreationDesc; CheckAndSetDefaultCallbacks(modifiedDeviceCreationDesc.callbackInterface); - CheckAndSetDefaultAllocator(modifiedDeviceCreationDesc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(modifiedDeviceCreationDesc.allocationCallbacks); #if NRI_USE_D3D11 if (modifiedDeviceCreationDesc.graphicsAPI == GraphicsAPI::D3D11) @@ -132,17 +135,17 @@ NRI_API Result NRI_CALL nriCreateDevice(const DeviceCreationDesc& deviceCreation NRI_API Result NRI_CALL nriCreateDeviceFromD3D11Device(const DeviceCreationD3D11Desc& deviceCreationD3D11Desc, Device*& device) { DeviceCreationDesc deviceCreationDesc = {}; deviceCreationDesc.callbackInterface = deviceCreationD3D11Desc.callbackInterface; - deviceCreationDesc.memoryAllocatorInterface = deviceCreationD3D11Desc.memoryAllocatorInterface; + deviceCreationDesc.allocationCallbacks = deviceCreationD3D11Desc.allocationCallbacks; deviceCreationDesc.graphicsAPI = GraphicsAPI::D3D11; deviceCreationDesc.enableNRIValidation = deviceCreationD3D11Desc.enableNRIValidation; CheckAndSetDefaultCallbacks(deviceCreationDesc.callbackInterface); - CheckAndSetDefaultAllocator(deviceCreationDesc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(deviceCreationDesc.allocationCallbacks); DeviceCreationD3D11Desc tempDeviceCreationD3D11Desc = deviceCreationD3D11Desc; CheckAndSetDefaultCallbacks(tempDeviceCreationD3D11Desc.callbackInterface); - CheckAndSetDefaultAllocator(tempDeviceCreationD3D11Desc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(tempDeviceCreationD3D11Desc.allocationCallbacks); Result result = Result::UNSUPPORTED; DeviceBase* deviceImpl = nullptr; @@ -160,17 +163,17 @@ NRI_API Result NRI_CALL nriCreateDeviceFromD3D11Device(const DeviceCreationD3D11 NRI_API Result NRI_CALL nriCreateDeviceFromD3D12Device(const DeviceCreationD3D12Desc& deviceCreationD3D12Desc, Device*& device) { DeviceCreationDesc deviceCreationDesc = {}; deviceCreationDesc.callbackInterface = deviceCreationD3D12Desc.callbackInterface; - deviceCreationDesc.memoryAllocatorInterface = deviceCreationD3D12Desc.memoryAllocatorInterface; + deviceCreationDesc.allocationCallbacks = deviceCreationD3D12Desc.allocationCallbacks; deviceCreationDesc.graphicsAPI = GraphicsAPI::D3D12; deviceCreationDesc.enableNRIValidation = deviceCreationD3D12Desc.enableNRIValidation; CheckAndSetDefaultCallbacks(deviceCreationDesc.callbackInterface); - CheckAndSetDefaultAllocator(deviceCreationDesc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(deviceCreationDesc.allocationCallbacks); DeviceCreationD3D12Desc tempDeviceCreationD3D12Desc = deviceCreationD3D12Desc; CheckAndSetDefaultCallbacks(tempDeviceCreationD3D12Desc.callbackInterface); - CheckAndSetDefaultAllocator(tempDeviceCreationD3D12Desc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(tempDeviceCreationD3D12Desc.allocationCallbacks); Result result = Result::UNSUPPORTED; DeviceBase* deviceImpl = nullptr; @@ -188,18 +191,18 @@ NRI_API Result NRI_CALL nriCreateDeviceFromD3D12Device(const DeviceCreationD3D12 NRI_API Result NRI_CALL nriCreateDeviceFromVkDevice(const DeviceCreationVKDesc& deviceCreationVKDesc, Device*& device) { DeviceCreationDesc deviceCreationDesc = {}; deviceCreationDesc.callbackInterface = deviceCreationVKDesc.callbackInterface; - deviceCreationDesc.memoryAllocatorInterface = deviceCreationVKDesc.memoryAllocatorInterface; + deviceCreationDesc.allocationCallbacks = deviceCreationVKDesc.allocationCallbacks; deviceCreationDesc.spirvBindingOffsets = deviceCreationVKDesc.spirvBindingOffsets; deviceCreationDesc.graphicsAPI = GraphicsAPI::VK; deviceCreationDesc.enableNRIValidation = false; CheckAndSetDefaultCallbacks(deviceCreationDesc.callbackInterface); - CheckAndSetDefaultAllocator(deviceCreationDesc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(deviceCreationDesc.allocationCallbacks); DeviceCreationVKDesc tempDeviceCreationVKDesc = deviceCreationVKDesc; CheckAndSetDefaultCallbacks(tempDeviceCreationVKDesc.callbackInterface); - CheckAndSetDefaultAllocator(tempDeviceCreationVKDesc.memoryAllocatorInterface); + CheckAndSetDefaultAllocator(tempDeviceCreationVKDesc.allocationCallbacks); Result result = Result::UNSUPPORTED; DeviceBase* deviceImpl = nullptr; @@ -215,7 +218,7 @@ NRI_API Result NRI_CALL nriCreateDeviceFromVkDevice(const DeviceCreationVKDesc& } NRI_API void NRI_CALL nriDestroyDevice(Device& device) { - ((DeviceBase&)device).Destroy(); + ((DeviceBase&)device).Destruct(); } NRI_API Format NRI_CALL nriConvertVKFormatToNRI(uint32_t vkFormat) { @@ -319,7 +322,7 @@ NRI_API Result NRI_CALL nriEnumerateAdapters(AdapterDesc* adapterDescs, uint32_t AdapterDesc& adapterDesc = adapterDescs[i]; memset(&adapterDesc, 0, sizeof(adapterDesc)); - wcstombs(adapterDesc.description, desc.Description, GetCountOf(adapterDesc.description) - 1); + wcstombs(adapterDesc.name, desc.Description, GetCountOf(adapterDesc.name) - 1); adapterDesc.luid = *(uint64_t*)&desc.AdapterLuid; adapterDesc.videoMemorySize = desc.DedicatedVideoMemory; adapterDesc.systemMemorySize = desc.DedicatedSystemMemory + desc.SharedSystemMemory; @@ -407,11 +410,11 @@ NRI_API Result NRI_CALL nriEnumerateAdapters(AdapterDesc* adapterDescs, uint32_t if (result == VK_SUCCESS && deviceGroupNum) { if (adapterDescs) { // Query device groups - VkPhysicalDeviceGroupProperties* deviceGroupProperties = STACK_ALLOC(VkPhysicalDeviceGroupProperties, deviceGroupNum); + VkPhysicalDeviceGroupProperties* deviceGroupProperties = StackAlloc(VkPhysicalDeviceGroupProperties, deviceGroupNum); vkEnumeratePhysicalDeviceGroups(instance, &deviceGroupNum, deviceGroupProperties); // Query device groups properties - AdapterDesc* adapterDescsSorted = STACK_ALLOC(AdapterDesc, deviceGroupNum); + AdapterDesc* adapterDescsSorted = StackAlloc(AdapterDesc, deviceGroupNum); for (uint32_t i = 0; i < deviceGroupNum; i++) { VkPhysicalDeviceIDProperties deviceIDProperties = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES}; VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; @@ -425,7 +428,7 @@ NRI_API Result NRI_CALL nriEnumerateAdapters(AdapterDesc* adapterDescs, uint32_t AdapterDesc& adapterDesc = adapterDescsSorted[i]; memset(&adapterDesc, 0, sizeof(adapterDesc)); - strncpy(adapterDesc.description, properties2.properties.deviceName, sizeof(adapterDesc.description)); + strncpy(adapterDesc.name, properties2.properties.deviceName, sizeof(adapterDesc.name)); adapterDesc.luid = *(uint64_t*)&deviceIDProperties.deviceLUID[0]; adapterDesc.deviceId = properties2.properties.deviceID; adapterDesc.vendor = GetVendorFromID(properties2.properties.vendorID); diff --git a/Source/D3D11/BufferD3D11.cpp b/Source/D3D11/BufferD3D11.cpp index d6c9c464..a394bb99 100644 --- a/Source/D3D11/BufferD3D11.cpp +++ b/Source/D3D11/BufferD3D11.cpp @@ -10,10 +10,10 @@ using namespace nri; BufferD3D11::~BufferD3D11() { - Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); + Destroy(m_Device.GetStdAllocator(), m_ReadbackTexture); } -Result BufferD3D11::Create(const MemoryD3D11& memory) { +Result BufferD3D11::Create(MemoryLocation memoryLocation, float priority) { // Buffer was already created externally if (m_Buffer) return Result::SUCCESS; @@ -22,35 +22,30 @@ Result BufferD3D11::Create(const MemoryD3D11& memory) { desc.ByteWidth = (uint32_t)m_Desc.size; desc.StructureByteStride = m_Desc.structureStride; - if (m_Desc.structureStride) - desc.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - // D3D11/D3D12 backends do not use FLAG_RAW for descriptors. It seems to be unnecessary because VK doesn't have // RAW functionality, but "byte adress buffers" in HLSL do work in VK if the underlying buffer is a structured // buffer with stride = 4. Since "ALLOW_RAW_VIEWS" is needed only for D3D11, add it here. if (m_Desc.structureStride == 4) desc.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; + if (m_Desc.structureStride) + desc.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + if (m_Desc.usageMask & BufferUsageBits::ARGUMENT_BUFFER) desc.MiscFlags |= D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; - MemoryLocation memoryLocation = memory.GetLocation(); if (memoryLocation == MemoryLocation::HOST_UPLOAD || memoryLocation == MemoryLocation::DEVICE_UPLOAD) { - if (m_Desc.usageMask == BufferUsageBits::NONE) { - m_Type = BufferType::UPLOAD; + if (m_Desc.usageMask == BufferUsageBits::NONE) { // needed for "UploadBufferToTexture" desc.Usage = D3D11_USAGE_STAGING; desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; } else { - m_Type = BufferType::DYNAMIC; desc.Usage = D3D11_USAGE_DYNAMIC; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; } } else if (memoryLocation == MemoryLocation::HOST_READBACK) { - m_Type = BufferType::READBACK; desc.Usage = D3D11_USAGE_STAGING; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; } else { - m_Type = BufferType::DEVICE; desc.Usage = D3D11_USAGE_DEFAULT; desc.CPUAccessFlags = 0; } @@ -73,9 +68,16 @@ Result BufferD3D11::Create(const MemoryD3D11& memory) { HRESULT hr = m_Device->CreateBuffer(&desc, nullptr, &m_Buffer); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D11Device::CreateBuffer()"); - uint32_t priority = memory.GetPriority(); - if (priority != 0) - m_Buffer->SetEvictionPriority(priority); + // Priority + uint32_t evictionPriority = ConvertPriority(priority); + if (evictionPriority != 0) + m_Buffer->SetEvictionPriority(evictionPriority); + + return Result::SUCCESS; +} + +Result BufferD3D11::Create(const BufferDesc& bufferDesc) { + m_Desc = bufferDesc; return Result::SUCCESS; } @@ -86,34 +88,30 @@ Result BufferD3D11::Create(const BufferD3D11Desc& bufferDesc) { else if (!GetBufferDesc(bufferDesc, m_Desc)) return Result::INVALID_ARGUMENT; - ID3D11Buffer* buffer = (ID3D11Buffer*)bufferDesc.d3d11Resource; - m_Buffer = buffer; - - D3D11_BUFFER_DESC desc = {}; - buffer->GetDesc(&desc); - - if (desc.Usage == D3D11_USAGE_STAGING) - m_Type = desc.CPUAccessFlags == (D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE) ? BufferType::UPLOAD : BufferType::READBACK; - else if (desc.Usage == D3D11_USAGE_DYNAMIC) - m_Type = BufferType::DYNAMIC; - else - m_Type = BufferType::DEVICE; + m_Buffer = (ID3D11Buffer*)bufferDesc.d3d11Resource; return Result::SUCCESS; } -void* BufferD3D11::Map(MapType mapType, uint64_t offset) { +void* BufferD3D11::Map(uint64_t offset) { D3D11_MAPPED_SUBRESOURCE mappedData = {}; m_Device.EnterCriticalSection(); { FinalizeQueries(); FinalizeReadback(); - D3D11_MAP map = D3D11_MAP_READ; - if (m_Type == BufferType::DYNAMIC) - map = D3D11_MAP_WRITE_NO_OVERWRITE; - else if (m_Type == BufferType::UPLOAD && mapType == MapType::DEFAULT) - map = D3D11_MAP_WRITE; + D3D11_BUFFER_DESC desc = {}; + m_Buffer->GetDesc(&desc); + + D3D11_MAP map = D3D11_MAP_WRITE; + if (desc.CPUAccessFlags == D3D11_CPU_ACCESS_WRITE) + map = desc.Usage == D3D11_USAGE_DYNAMIC ? D3D11_MAP_WRITE_NO_OVERWRITE : D3D11_MAP_WRITE; + else if (desc.CPUAccessFlags == D3D11_CPU_ACCESS_READ) + map = D3D11_MAP_READ; + else if (desc.CPUAccessFlags == (D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE)) + map = D3D11_MAP_READ_WRITE; // needed for "UploadBufferToTexture" + else + CHECK(false, "Unmappable"); HRESULT hr = m_Device.GetImmediateContext()->Map(m_Buffer, 0, map, 0, &mappedData); if (FAILED(hr)) { @@ -217,10 +215,11 @@ TextureD3D11& BufferD3D11::RecreateReadbackTexture(const TextureD3D11& srcTextur else if (srcRegionDesc.height == 1) textureDesc.type = TextureType::TEXTURE_1D; - Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); + Destroy(m_Device.GetStdAllocator(), m_ReadbackTexture); - m_ReadbackTexture = Allocate(m_Device.GetStdAllocator(), m_Device, textureDesc); - m_ReadbackTexture->Create(nullptr); + m_Device.CreateImplementation(m_ReadbackTexture, textureDesc); + if (m_ReadbackTexture) + m_ReadbackTexture->Create(MemoryLocation::HOST_READBACK, 0.0f); } m_IsReadbackDataChanged = true; @@ -233,15 +232,9 @@ TextureD3D11& BufferD3D11::RecreateReadbackTexture(const TextureD3D11& srcTextur // NRI //================================================================================================================ -inline void* BufferD3D11::Map(uint64_t offset, uint64_t size) { - MaybeUnused(size); - - return Map(MapType::DEFAULT, offset); -} - void BufferD3D11::Unmap() { m_Device.EnterCriticalSection(); - { m_Device.GetImmediateContext()->Unmap(m_Buffer, 0); } + m_Device.GetImmediateContext()->Unmap(m_Buffer, 0); m_Device.LeaveCriticalSection(); } diff --git a/Source/D3D11/BufferD3D11.h b/Source/D3D11/BufferD3D11.h index c700b404..b6174cdd 100644 --- a/Source/D3D11/BufferD3D11.h +++ b/Source/D3D11/BufferD3D11.h @@ -17,9 +17,6 @@ struct QueryRange { }; struct BufferD3D11 { - inline BufferD3D11(DeviceD3D11& device, const BufferDesc& bufferDesc) : m_Device(device), m_Desc(bufferDesc) { - } - inline BufferD3D11(DeviceD3D11& device) : m_Device(device) { } @@ -44,9 +41,10 @@ struct BufferD3D11 { ~BufferD3D11(); - Result Create(const MemoryD3D11& memory); + Result Create(const BufferDesc& bufferDesc); Result Create(const BufferD3D11Desc& bufferDesc); - void* Map(MapType mapType, uint64_t offset); + Result Create(MemoryLocation memoryLocation, float priority); + void* Map(uint64_t offset); void FinalizeQueries(); void FinalizeReadback(); TextureD3D11& RecreateReadbackTexture(const TextureD3D11& srcTexture, const TextureRegionDesc& srcRegionDesc, const TextureDataLayoutDesc& readbackDataLayoutDesc); @@ -59,7 +57,6 @@ struct BufferD3D11 { SET_D3D_DEBUG_OBJECT_NAME(m_Buffer, name); } - void* Map(uint64_t offset, uint64_t size); void Unmap(); private: @@ -67,7 +64,6 @@ struct BufferD3D11 { ComPtr m_Buffer; TextureD3D11* m_ReadbackTexture = nullptr; BufferDesc m_Desc = {}; - BufferType m_Type = BufferType::DEVICE; QueryRange m_QueryRange = {}; bool m_IsReadbackDataChanged = false; TextureDataLayoutDesc m_ReadbackDataLayoutDesc = {}; diff --git a/Source/D3D11/BufferD3D11.hpp b/Source/D3D11/BufferD3D11.hpp index ece90bec..05397d4b 100644 --- a/Source/D3D11/BufferD3D11.hpp +++ b/Source/D3D11/BufferD3D11.hpp @@ -14,7 +14,8 @@ static uint64_t NRI_CALL GetBufferNativeObject(const Buffer& buffer) { } static void* NRI_CALL MapBuffer(Buffer& buffer, uint64_t offset, uint64_t size) { - return ((BufferD3D11&)buffer).Map(offset, size); + MaybeUnused(size); + return ((BufferD3D11&)buffer).Map(offset); } static void NRI_CALL UnmapBuffer(Buffer& buffer) { diff --git a/Source/D3D11/CommandAllocatorD3D11.cpp b/Source/D3D11/CommandAllocatorD3D11.cpp index 26428e54..21f5c8c1 100644 --- a/Source/D3D11/CommandAllocatorD3D11.cpp +++ b/Source/D3D11/CommandAllocatorD3D11.cpp @@ -29,9 +29,9 @@ Result CreateCommandBuffer(DeviceD3D11& device, ID3D11DeviceContext* precreatedC } if (isImmediate) - Deallocate(device.GetStdAllocator(), (CommandBufferEmuD3D11*)impl); + Destroy(device.GetStdAllocator(), (CommandBufferEmuD3D11*)impl); else - Deallocate(device.GetStdAllocator(), (CommandBufferD3D11*)impl); + Destroy(device.GetStdAllocator(), (CommandBufferD3D11*)impl); return result; } diff --git a/Source/D3D11/CommandBufferD3D11.cpp b/Source/D3D11/CommandBufferD3D11.cpp index 9c11ca6a..568c7389 100644 --- a/Source/D3D11/CommandBufferD3D11.cpp +++ b/Source/D3D11/CommandBufferD3D11.cpp @@ -116,7 +116,7 @@ void CommandBufferD3D11::SetViewports(const Viewport* viewports, uint32_t viewpo } void CommandBufferD3D11::SetScissors(const Rect* rects, uint32_t rectNum) { - D3D11_RECT* rectsD3D = STACK_ALLOC(D3D11_RECT, rectNum); + D3D11_RECT* rectsD3D = StackAlloc(D3D11_RECT, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& rect = rects[i]; @@ -182,7 +182,7 @@ void CommandBufferD3D11::ClearAttachments(const ClearDesc* clearDescs, uint32_t } } } else { - D3D11_RECT* rectsD3D = STACK_ALLOC(D3D11_RECT, rectNum); + D3D11_RECT* rectsD3D = StackAlloc(D3D11_RECT, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& rect = rects[i]; rectsD3D[i] = {rect.x, rect.y, (LONG)(rect.x + rect.width), (LONG)(rect.y + rect.height)}; @@ -254,7 +254,7 @@ void CommandBufferD3D11::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, offsets = s_nullOffsets; if (m_VertexBuffer != buffers[0] || m_VertexBufferOffset != offsets[0] || m_VertexBufferBaseSlot != baseSlot || bufferNum > 1) { - uint8_t* mem = STACK_ALLOC(uint8_t, bufferNum * (sizeof(ID3D11Buffer*) + sizeof(uint32_t) * 2)); + uint8_t* mem = StackAlloc(uint8_t, bufferNum * (sizeof(ID3D11Buffer*) + sizeof(uint32_t) * 2)); ID3D11Buffer** buf = (ID3D11Buffer**)mem; mem += bufferNum * sizeof(ID3D11Buffer*); @@ -414,7 +414,7 @@ void CommandBufferD3D11::UploadBufferToTexture( uint32_t dstSubresource = dst.GetSubresourceIndex(dstRegionDesc); - uint8_t* data = (uint8_t*)src.Map(MapType::READ, srcDataLayoutDesc.offset); + uint8_t* data = (uint8_t*)src.Map(srcDataLayoutDesc.offset); m_DeferredContext->UpdateSubresource(dst, dstSubresource, &dstBox, data, srcDataLayoutDesc.rowPitch, srcDataLayoutDesc.slicePitch); src.Unmap(); } @@ -513,7 +513,7 @@ void CommandBufferD3D11::CopyQueries(const QueryPool& queryPool, uint32_t offset void CommandBufferD3D11::BeginAnnotation(const char* name) { size_t len = strlen(name) + 1; - wchar_t* s = STACK_ALLOC(wchar_t, len); + wchar_t* s = StackAlloc(wchar_t, len); ConvertCharToWchar(name, s, len); m_Annotation->BeginEvent(s); diff --git a/Source/D3D11/CommandBufferD3D11.hpp b/Source/D3D11/CommandBufferD3D11.hpp index 9f6e5d96..7c152a8b 100644 --- a/Source/D3D11/CommandBufferD3D11.hpp +++ b/Source/D3D11/CommandBufferD3D11.hpp @@ -159,14 +159,6 @@ static void NRI_CALL CmdCopyQueries(CommandBuffer& commandBuffer, const QueryPoo static void NRI_CALL CmdResetQueries(CommandBuffer&, const QueryPool&, uint32_t, uint32_t) { } -static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { - if (!(&commandBuffer)) - return; - - CommandBufferHelper& commandBufferHelper = (CommandBufferHelper&)commandBuffer; - Deallocate(commandBufferHelper.GetStdAllocator(), &commandBufferHelper); -} - static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandBuffer) { if (!(&commandBuffer)) return nullptr; diff --git a/Source/D3D11/CommandBufferEmuD3D11.hpp b/Source/D3D11/CommandBufferEmuD3D11.hpp index ad78ebc1..acfe51e4 100644 --- a/Source/D3D11/CommandBufferEmuD3D11.hpp +++ b/Source/D3D11/CommandBufferEmuD3D11.hpp @@ -159,14 +159,6 @@ static void NRI_CALL CmdCopyQueries(CommandBuffer& commandBuffer, const QueryPoo static void NRI_CALL CmdResetQueries(CommandBuffer&, const QueryPool&, uint32_t, uint32_t) { } -static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { - if (!(&commandBuffer)) - return; - - CommandBufferHelper& commandBufferHelper = (CommandBufferHelper&)commandBuffer; - Deallocate(commandBufferHelper.GetStdAllocator(), &commandBufferHelper); -} - static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandBuffer) { if (!(&commandBuffer)) return nullptr; @@ -177,45 +169,44 @@ static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandB #pragma endregion -void Core_CommandBufferEmu_PartiallyFillFunctionTable(CoreInterface& coreInterface) { - coreInterface.DestroyCommandBuffer = ::DestroyCommandBuffer; - coreInterface.BeginCommandBuffer = ::BeginCommandBuffer; - coreInterface.CmdSetDescriptorPool = ::CmdSetDescriptorPool; - coreInterface.CmdSetDescriptorSet = ::CmdSetDescriptorSet; - coreInterface.CmdSetPipelineLayout = ::CmdSetPipelineLayout; - coreInterface.CmdSetPipeline = ::CmdSetPipeline; - coreInterface.CmdSetConstants = ::CmdSetConstants; - coreInterface.CmdBarrier = ::CmdBarrier; - coreInterface.CmdBeginRendering = ::CmdBeginRendering; - coreInterface.CmdClearAttachments = ::CmdClearAttachments; - coreInterface.CmdSetViewports = ::CmdSetViewports; - coreInterface.CmdSetScissors = ::CmdSetScissors; - coreInterface.CmdSetDepthBounds = ::CmdSetDepthBounds; - coreInterface.CmdSetStencilReference = ::CmdSetStencilReference; - coreInterface.CmdSetSamplePositions = ::CmdSetSamplePositions; - coreInterface.CmdSetBlendConstants = ::CmdSetBlendConstants; - coreInterface.CmdSetIndexBuffer = ::CmdSetIndexBuffer; - coreInterface.CmdSetVertexBuffers = ::CmdSetVertexBuffers; - coreInterface.CmdDraw = ::CmdDraw; - coreInterface.CmdDrawIndexed = ::CmdDrawIndexed; - coreInterface.CmdDrawIndirect = ::CmdDrawIndirect; - coreInterface.CmdDrawIndexedIndirect = ::CmdDrawIndexedIndirect; - coreInterface.CmdEndRendering = ::CmdEndRendering; - coreInterface.CmdDispatch = ::CmdDispatch; - coreInterface.CmdDispatchIndirect = ::CmdDispatchIndirect; - coreInterface.CmdBeginQuery = ::CmdBeginQuery; - coreInterface.CmdEndQuery = ::CmdEndQuery; - coreInterface.CmdCopyQueries = ::CmdCopyQueries; - coreInterface.CmdResetQueries = ::CmdResetQueries; - coreInterface.CmdBeginAnnotation = ::CmdBeginAnnotation; - coreInterface.CmdEndAnnotation = ::CmdEndAnnotation; - coreInterface.CmdClearStorageBuffer = ::CmdClearStorageBuffer; - coreInterface.CmdClearStorageTexture = ::CmdClearStorageTexture; - coreInterface.CmdCopyBuffer = ::CmdCopyBuffer; - coreInterface.CmdCopyTexture = ::CmdCopyTexture; - coreInterface.CmdUploadBufferToTexture = ::CmdUploadBufferToTexture; - coreInterface.CmdReadbackTextureToBuffer = ::CmdReadbackTextureToBuffer; - coreInterface.EndCommandBuffer = ::EndCommandBuffer; - coreInterface.SetCommandBufferDebugName = ::SetCommandBufferDebugName; - coreInterface.GetCommandBufferNativeObject = ::GetCommandBufferNativeObject; +void Core_CommandBufferEmu_PartiallyFillFunctionTable(CoreInterface& table) { + table.BeginCommandBuffer = ::BeginCommandBuffer; + table.CmdSetDescriptorPool = ::CmdSetDescriptorPool; + table.CmdSetDescriptorSet = ::CmdSetDescriptorSet; + table.CmdSetPipelineLayout = ::CmdSetPipelineLayout; + table.CmdSetPipeline = ::CmdSetPipeline; + table.CmdSetConstants = ::CmdSetConstants; + table.CmdBarrier = ::CmdBarrier; + table.CmdBeginRendering = ::CmdBeginRendering; + table.CmdClearAttachments = ::CmdClearAttachments; + table.CmdSetViewports = ::CmdSetViewports; + table.CmdSetScissors = ::CmdSetScissors; + table.CmdSetDepthBounds = ::CmdSetDepthBounds; + table.CmdSetStencilReference = ::CmdSetStencilReference; + table.CmdSetSamplePositions = ::CmdSetSamplePositions; + table.CmdSetBlendConstants = ::CmdSetBlendConstants; + table.CmdSetIndexBuffer = ::CmdSetIndexBuffer; + table.CmdSetVertexBuffers = ::CmdSetVertexBuffers; + table.CmdDraw = ::CmdDraw; + table.CmdDrawIndexed = ::CmdDrawIndexed; + table.CmdDrawIndirect = ::CmdDrawIndirect; + table.CmdDrawIndexedIndirect = ::CmdDrawIndexedIndirect; + table.CmdEndRendering = ::CmdEndRendering; + table.CmdDispatch = ::CmdDispatch; + table.CmdDispatchIndirect = ::CmdDispatchIndirect; + table.CmdBeginQuery = ::CmdBeginQuery; + table.CmdEndQuery = ::CmdEndQuery; + table.CmdCopyQueries = ::CmdCopyQueries; + table.CmdResetQueries = ::CmdResetQueries; + table.CmdBeginAnnotation = ::CmdBeginAnnotation; + table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdClearStorageBuffer = ::CmdClearStorageBuffer; + table.CmdClearStorageTexture = ::CmdClearStorageTexture; + table.CmdCopyBuffer = ::CmdCopyBuffer; + table.CmdCopyTexture = ::CmdCopyTexture; + table.CmdUploadBufferToTexture = ::CmdUploadBufferToTexture; + table.CmdReadbackTextureToBuffer = ::CmdReadbackTextureToBuffer; + table.EndCommandBuffer = ::EndCommandBuffer; + table.SetCommandBufferDebugName = ::SetCommandBufferDebugName; + table.GetCommandBufferNativeObject = ::GetCommandBufferNativeObject; } diff --git a/Source/D3D11/DeviceD3D11.cpp b/Source/D3D11/DeviceD3D11.cpp index 2054f598..4b4ba335 100644 --- a/Source/D3D11/DeviceD3D11.cpp +++ b/Source/D3D11/DeviceD3D11.cpp @@ -44,16 +44,15 @@ static uint8_t QueryLatestDevice(ComPtr& in, ComPtr allocator(deviceCreationDesc.memoryAllocatorInterface); - DeviceD3D11* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - Result result = implementation->Create(deviceCreationDesc, nullptr, nullptr, false); + StdAllocator allocator(deviceCreationDesc.allocationCallbacks); + DeviceD3D11* impl = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); + Result result = impl->Create(deviceCreationDesc, nullptr, nullptr, false); - if (result == Result::SUCCESS) { - device = (DeviceBase*)implementation; - return Result::SUCCESS; - } - - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; return result; } @@ -64,20 +63,19 @@ Result CreateDeviceD3D11(const DeviceCreationD3D11Desc& deviceCreationD3D11Desc, DeviceCreationDesc deviceCreationDesc = {}; deviceCreationDesc.callbackInterface = deviceCreationD3D11Desc.callbackInterface; - deviceCreationDesc.memoryAllocatorInterface = deviceCreationD3D11Desc.memoryAllocatorInterface; + deviceCreationDesc.allocationCallbacks = deviceCreationD3D11Desc.allocationCallbacks; deviceCreationDesc.enableD3D11CommandBufferEmulation = deviceCreationD3D11Desc.enableD3D11CommandBufferEmulation; deviceCreationDesc.graphicsAPI = GraphicsAPI::D3D11; - StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); - DeviceD3D11* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - Result result = implementation->Create(deviceCreationDesc, deviceCreationD3D11Desc.d3d11Device, deviceCreationD3D11Desc.agsContext, deviceCreationD3D11Desc.isNVAPILoaded); + StdAllocator allocator(deviceCreationDesc.allocationCallbacks); + DeviceD3D11* impl = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); + Result result = impl->Create(deviceCreationDesc, deviceCreationD3D11Desc.d3d11Device, deviceCreationD3D11Desc.agsContext, deviceCreationD3D11Desc.isNVAPILoaded); - if (result == Result::SUCCESS) { - device = (DeviceBase*)implementation; - return Result::SUCCESS; - } - - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; return result; } @@ -99,9 +97,9 @@ DeviceD3D11::~DeviceD3D11() { m_Ext.m_AGS.DestroyDeviceD3D11(m_Ext.m_AGSContext, m_Device, nullptr, m_ImmediateContext, nullptr); #endif - for (CommandQueueD3D11* commandQueue: m_CommandQueues) { + for (CommandQueueD3D11* commandQueue : m_CommandQueues) { if (commandQueue) - Deallocate(GetStdAllocator(), commandQueue); + Destroy(GetStdAllocator(), commandQueue); } } @@ -134,7 +132,7 @@ Result DeviceD3D11::Create(const DeviceCreationDesc& deviceCreationDesc, ID3D11D HRESULT hr = m_Adapter->GetDesc(&desc); RETURN_ON_BAD_HRESULT(this, hr, "IDXGIAdapter::GetDesc()"); - wcstombs(m_Desc.adapterDesc.description, desc.Description, GetCountOf(m_Desc.adapterDesc.description) - 1); + wcstombs(m_Desc.adapterDesc.name, desc.Description, GetCountOf(m_Desc.adapterDesc.name) - 1); m_Desc.adapterDesc.luid = *(uint64_t*)&desc.AdapterLuid; m_Desc.adapterDesc.videoMemorySize = desc.DedicatedVideoMemory; m_Desc.adapterDesc.systemMemorySize = desc.DedicatedSystemMemory + desc.SharedSystemMemory; @@ -213,14 +211,14 @@ Result DeviceD3D11::Create(const DeviceCreationDesc& deviceCreationDesc, ID3D11D m_IsWrapped = true; m_Version = QueryLatestDevice(deviceTemp, m_Device); - REPORT_INFO(this, "Using ID3D11Device%u...", m_Version); + REPORT_INFO(this, "Using ID3D11Device%u", m_Version); // Immediate context ComPtr immediateContext; m_Device->GetImmediateContext((ID3D11DeviceContext**)&immediateContext); m_ImmediateContextVersion = QueryLatestDeviceContext(immediateContext, m_ImmediateContext); - REPORT_INFO(this, "Using ID3D11DeviceContext%u...", m_ImmediateContextVersion); + REPORT_INFO(this, "Using ID3D11DeviceContext%u", m_ImmediateContextVersion); // Skip UAV barriers by default on the immediate context GetExt()->BeginUAVOverlap(m_ImmediateContext); @@ -341,7 +339,6 @@ void DeviceD3D11::FillDesc() { m_Desc.bufferTextureGranularity = 1; m_Desc.bufferMaxSize = D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024ull * 1024ull; m_Desc.pushConstantsMaxSize = D3D11_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT * 16; - m_Desc.memoryTier = MemoryTier::ONE; m_Desc.boundDescriptorSetMaxNum = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; m_Desc.perStageDescriptorSamplerMaxNum = D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT; @@ -442,22 +439,6 @@ void DeviceD3D11::FillDesc() { m_Desc.isLowLatencySupported = m_Ext.HasNVAPI(); } -template -Result DeviceD3D11::CreateImplementation(Interface*& entity, const Args&... args) { - Implementation* implementation = Allocate(GetStdAllocator(), *this); - Result result = implementation->Create(args...); - - if (result == Result::SUCCESS) { - entity = (Interface*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - entity = nullptr; - - return result; -} - void DeviceD3D11::GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const { const bool isConstantBuffer = (bufferDesc.usageMask & BufferUsageBits::CONSTANT_BUFFER) == (uint32_t)BufferUsageBits::CONSTANT_BUFFER; @@ -467,10 +448,10 @@ void DeviceD3D11::GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation mem else if (bufferDesc.size <= 4096) alignment = 4096; + memoryDesc = {}; memoryDesc.type = (MemoryType)memoryLocation; memoryDesc.size = Align(bufferDesc.size, alignment); memoryDesc.alignment = alignment; - memoryDesc.mustBeDedicated = false; } void DeviceD3D11::GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const { @@ -485,32 +466,24 @@ void DeviceD3D11::GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation m size = Align(size, alignment); + memoryDesc = {}; memoryDesc.type = (MemoryType)memoryLocation; memoryDesc.size = size; memoryDesc.alignment = alignment; - memoryDesc.mustBeDedicated = false; } //================================================================================================================ // DeviceBase //================================================================================================================ -void DeviceD3D11::Destroy() { - Deallocate(GetStdAllocator(), this); +void DeviceD3D11::Destruct() { + Destroy(GetStdAllocator(), this); } //================================================================================================================ // NRI //================================================================================================================ -inline Result DeviceD3D11::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return CreateImplementation(swapChain, swapChainDesc); -} - -inline void DeviceD3D11::DestroySwapChain(SwapChain& swapChain) { - Deallocate(GetStdAllocator(), (SwapChainD3D11*)&swapChain); -} - inline Result DeviceD3D11::GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue) { commandQueue = (CommandQueue*)m_CommandQueues[(uint32_t)commandQueueType]; @@ -519,154 +492,16 @@ inline Result DeviceD3D11::GetCommandQueue(CommandQueueType commandQueueType, Co inline Result DeviceD3D11::CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator) { MaybeUnused(commandQueue); - commandAllocator = (CommandAllocator*)Allocate(GetStdAllocator(), *this); return Result::SUCCESS; } -inline Result DeviceD3D11::CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return CreateImplementation(descriptorPool, descriptorPoolDesc); -} - -inline Result DeviceD3D11::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { - buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferDesc); - - return Result::SUCCESS; -} - -inline Result DeviceD3D11::CreateTexture(const TextureDesc& textureDesc, Texture*& texture) { - texture = (Texture*)Allocate(GetStdAllocator(), *this, textureDesc); - - return Result::SUCCESS; -} - -inline Result DeviceD3D11::CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { - return CreateImplementation(bufferView, bufferViewDesc); -} - -inline Result DeviceD3D11::CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceD3D11::CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceD3D11::CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceD3D11::CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return CreateImplementation(sampler, samplerDesc); -} - -inline Result DeviceD3D11::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - PipelineLayoutD3D11* implementation = Allocate(GetStdAllocator(), *this); - Result res = implementation->Create(pipelineLayoutDesc); - - if (res == Result::SUCCESS) { - pipelineLayout = (PipelineLayout*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return res; -} - -inline Result DeviceD3D11::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - PipelineD3D11* implementation = Allocate(GetStdAllocator(), *this); - Result res = implementation->Create(graphicsPipelineDesc); - - if (res == Result::SUCCESS) { - pipeline = (Pipeline*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return res; -} - -inline Result DeviceD3D11::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - PipelineD3D11* implementation = Allocate(GetStdAllocator(), *this); - Result res = implementation->Create(computePipelineDesc); - - if (res == Result::SUCCESS) { - pipeline = (Pipeline*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return res; -} - -inline Result DeviceD3D11::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return CreateImplementation(queryPool, queryPoolDesc); -} - -inline Result DeviceD3D11::CreateFence(uint64_t initialValue, Fence*& fence) { - FenceD3D11* implementation = Allocate(GetStdAllocator(), *this); - Result res = implementation->Create(initialValue); - - if (res == Result::SUCCESS) { - fence = (Fence*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return res; -} - -inline void DeviceD3D11::DestroyCommandAllocator(CommandAllocator& commandAllocator) { - Deallocate(GetStdAllocator(), (CommandAllocatorD3D11*)&commandAllocator); -} - -inline void DeviceD3D11::DestroyDescriptorPool(DescriptorPool& descriptorPool) { - Deallocate(GetStdAllocator(), (DescriptorPoolD3D11*)&descriptorPool); -} - -inline void DeviceD3D11::DestroyBuffer(Buffer& buffer) { - Deallocate(GetStdAllocator(), (BufferD3D11*)&buffer); -} - -inline void DeviceD3D11::DestroyTexture(Texture& texture) { - Deallocate(GetStdAllocator(), (TextureD3D11*)&texture); -} - -inline void DeviceD3D11::DestroyDescriptor(Descriptor& descriptor) { - Deallocate(GetStdAllocator(), (DescriptorD3D11*)&descriptor); -} - -inline void DeviceD3D11::DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - Deallocate(GetStdAllocator(), (PipelineLayoutD3D11*)&pipelineLayout); -} - -inline void DeviceD3D11::DestroyPipeline(Pipeline& pipeline) { - Deallocate(GetStdAllocator(), (PipelineD3D11*)&pipeline); -} - -inline void DeviceD3D11::DestroyQueryPool(QueryPool& queryPool) { - Deallocate(GetStdAllocator(), (QueryPoolD3D11*)&queryPool); -} - -inline void DeviceD3D11::DestroyFence(Fence& fence) { - Deallocate(GetStdAllocator(), (FenceD3D11*)&fence); -} - -inline Result DeviceD3D11::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - memory = (Memory*)Allocate(GetStdAllocator(), *this, allocateMemoryDesc); - - return Result::SUCCESS; -} - inline Result DeviceD3D11::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { const BufferMemoryBindingDesc& desc = memoryBindingDescs[i]; - Result res = ((BufferD3D11*)desc.buffer)->Create(*(MemoryD3D11*)desc.memory); + const MemoryD3D11& memory = *(MemoryD3D11*)desc.memory; + Result res = ((BufferD3D11*)desc.buffer)->Create(memory.GetLocation(), memory.GetPriority()); if (res != Result::SUCCESS) return res; } @@ -677,7 +512,8 @@ inline Result DeviceD3D11::BindBufferMemory(const BufferMemoryBindingDesc* memor inline Result DeviceD3D11::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { const TextureMemoryBindingDesc& desc = memoryBindingDescs[i]; - Result res = ((TextureD3D11*)desc.texture)->Create((MemoryD3D11*)desc.memory); + const MemoryD3D11& memory = *(MemoryD3D11*)desc.memory; + Result res = ((TextureD3D11*)desc.texture)->Create(memory.GetLocation(), memory.GetPriority()); if (res != Result::SUCCESS) return res; } @@ -685,10 +521,6 @@ inline Result DeviceD3D11::BindTextureMemory(const TextureMemoryBindingDesc* mem return Result::SUCCESS; } -inline void DeviceD3D11::FreeMemory(Memory& memory) { - Deallocate(GetStdAllocator(), (MemoryD3D11*)&memory); -} - inline FormatSupportBits DeviceD3D11::GetFormatSupport(Format format) const { FormatSupportBits mask = FormatSupportBits::UNSUPPORTED; diff --git a/Source/D3D11/DeviceD3D11.h b/Source/D3D11/DeviceD3D11.h index 370b64ec..dc1e6ff4 100644 --- a/Source/D3D11/DeviceD3D11.h +++ b/Source/D3D11/DeviceD3D11.h @@ -63,6 +63,20 @@ struct DeviceD3D11 final : public DeviceBase { ::LeaveCriticalSection(&m_CriticalSection); } + template + inline Result CreateImplementation(Interface*& entity, const Args&... args) { + Implementation* impl = Allocate(GetStdAllocator(), *this); + Result result = impl->Create(args...); + + if (result != Result::SUCCESS) { + Destroy(GetStdAllocator(), impl); + entity = nullptr; + } else + entity = (Interface*)impl; + + return result; + } + Result Create(const DeviceCreationDesc& deviceCreationDesc, ID3D11Device* precreatedDevice, AGSContext* agsContext, bool isNVAPILoadedInApp); void GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; void GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; @@ -71,44 +85,17 @@ struct DeviceD3D11 final : public DeviceBase { // NRI //================================================================================================================ - inline void SetDebugName(const char* name) { - SET_D3D_DEBUG_OBJECT_NAME(m_Device, name); - SET_D3D_DEBUG_OBJECT_NAME(m_ImmediateContext, name); - } - - Result CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain); - void DestroySwapChain(SwapChain& swapChain); - - Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator); - Result CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool); - Result CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer); - Result CreateTexture(const TextureDesc& textureDesc, Texture*& texture); - Result CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView); - Result CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler); - Result CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout); - Result CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline); - Result CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline); - Result CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool); - Result CreateFence(uint64_t initialValue, Fence*& queueSemaphore); - void DestroyCommandAllocator(CommandAllocator& commandAllocator); - void DestroyDescriptorPool(DescriptorPool& descriptorPool); - void DestroyBuffer(Buffer& buffer); - void DestroyTexture(Texture& texture); - void DestroyDescriptor(Descriptor& descriptor); - void DestroyPipelineLayout(PipelineLayout& pipelineLayout); - void DestroyPipeline(Pipeline& pipeline); - void DestroyQueryPool(QueryPool& queryPool); - void DestroyFence(Fence& fence); - Result AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory); + Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); Result BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void FreeMemory(Memory& memory); FormatSupportBits GetFormatSupport(Format format) const; + inline void SetDebugName(const char* name) { + SET_D3D_DEBUG_OBJECT_NAME(m_Device, name); + SET_D3D_DEBUG_OBJECT_NAME(m_ImmediateContext, name); + } + //================================================================================================================ // DeviceBase //================================================================================================================ @@ -117,20 +104,18 @@ struct DeviceD3D11 final : public DeviceBase { return m_Desc; } - void Destroy(); + void Destruct(); Result FillFunctionTable(CoreInterface& table) const; + Result FillFunctionTable(HelperInterface& table) const; + Result FillFunctionTable(LowLatencyInterface& table) const; + Result FillFunctionTable(StreamerInterface& table) const; Result FillFunctionTable(SwapChainInterface& table) const; + Result FillFunctionTable(ResourceAllocatorInterface& table) const; Result FillFunctionTable(WrapperD3D11Interface& table) const; - Result FillFunctionTable(HelperInterface& helperInterface) const; - Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; - Result FillFunctionTable(StreamerInterface& streamerInterface) const; private: void FillDesc(); - template - Result CreateImplementation(Interface*& entity, const Args&... args); - private: d3d11::Ext m_Ext = {}; // don't sort: destructor must be called last! ComPtr m_Device; diff --git a/Source/D3D11/DeviceD3D11.hpp b/Source/D3D11/DeviceD3D11.hpp index 8a69fa0c..0c0e8597 100644 --- a/Source/D3D11/DeviceD3D11.hpp +++ b/Source/D3D11/DeviceD3D11.hpp @@ -38,135 +38,107 @@ static Result NRI_CALL CreateCommandAllocator(const CommandQueue& commandQueue, } static Result NRI_CALL CreateDescriptorPool(Device& device, const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return ((DeviceD3D11&)device).CreateDescriptorPool(descriptorPoolDesc, descriptorPool); + return ((DeviceD3D11&)device).CreateImplementation(descriptorPool, descriptorPoolDesc); } static Result NRI_CALL CreateBuffer(Device& device, const BufferDesc& bufferDesc, Buffer*& buffer) { - return ((DeviceD3D11&)device).CreateBuffer(bufferDesc, buffer); + return ((DeviceD3D11&)device).CreateImplementation(buffer, bufferDesc); } static Result NRI_CALL CreateTexture(Device& device, const TextureDesc& textureDesc, Texture*& texture) { - return ((DeviceD3D11&)device).CreateTexture(textureDesc, texture); + return ((DeviceD3D11&)device).CreateImplementation(texture, textureDesc); } static Result NRI_CALL CreateBufferView(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { DeviceD3D11& device = ((const BufferD3D11*)bufferViewDesc.buffer)->GetDevice(); - return device.CreateDescriptor(bufferViewDesc, bufferView); + return device.CreateImplementation(bufferView, bufferViewDesc); } static Result NRI_CALL CreateTexture1DView(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D11& device = ((const TextureD3D11*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture2DView(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D11& device = ((const TextureD3D11*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture3DView(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D11& device = ((const TextureD3D11*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateSampler(Device& device, const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return ((DeviceD3D11&)device).CreateDescriptor(samplerDesc, sampler); + return ((DeviceD3D11&)device).CreateImplementation(sampler, samplerDesc); } static Result NRI_CALL CreatePipelineLayout(Device& device, const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - return ((DeviceD3D11&)device).CreatePipelineLayout(pipelineLayoutDesc, pipelineLayout); + return ((DeviceD3D11&)device).CreateImplementation(pipelineLayout, pipelineLayoutDesc); } static Result NRI_CALL CreateGraphicsPipeline(Device& device, const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - return ((DeviceD3D11&)device).CreatePipeline(graphicsPipelineDesc, pipeline); + return ((DeviceD3D11&)device).CreateImplementation(pipeline, graphicsPipelineDesc); } static Result NRI_CALL CreateComputePipeline(Device& device, const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - return ((DeviceD3D11&)device).CreatePipeline(computePipelineDesc, pipeline); + return ((DeviceD3D11&)device).CreateImplementation(pipeline, computePipelineDesc); } static Result NRI_CALL CreateQueryPool(Device& device, const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return ((DeviceD3D11&)device).CreateQueryPool(queryPoolDesc, queryPool); + return ((DeviceD3D11&)device).CreateImplementation(queryPool, queryPoolDesc); } -static Result NRI_CALL CreateFence(Device& device, uint64_t initialValue, Fence*& queueSemaphore) { - return ((DeviceD3D11&)device).CreateFence(initialValue, queueSemaphore); +static Result NRI_CALL CreateFence(Device& device, uint64_t initialValue, Fence*& fence) { + return ((DeviceD3D11&)device).CreateImplementation(fence, initialValue); } static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { - if (!(&commandAllocator)) + Destroy((CommandAllocatorD3D11*)&commandAllocator); +} + +static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { + if (!(&commandBuffer)) return; - DeviceD3D11& device = ((CommandAllocatorD3D11&)commandAllocator).GetDevice(); - device.DestroyCommandAllocator(commandAllocator); + CommandBufferHelper& commandBufferHelper = (CommandBufferHelper&)commandBuffer; + Destroy(commandBufferHelper.GetStdAllocator(), &commandBufferHelper); } static void NRI_CALL DestroyDescriptorPool(DescriptorPool& descriptorPool) { - if (!(&descriptorPool)) - return; - - DeviceD3D11& device = ((DescriptorPoolD3D11&)descriptorPool).GetDevice(); - device.DestroyDescriptorPool(descriptorPool); + Destroy((DescriptorPoolD3D11*)&descriptorPool); } static void NRI_CALL DestroyBuffer(Buffer& buffer) { - if (!(&buffer)) - return; - - DeviceD3D11& device = ((BufferD3D11&)buffer).GetDevice(); - device.DestroyBuffer(buffer); + Destroy((BufferD3D11*)&buffer); } static void NRI_CALL DestroyTexture(Texture& texture) { - if (!(&texture)) - return; - - DeviceD3D11& device = ((TextureD3D11&)texture).GetDevice(); - device.DestroyTexture(texture); + Destroy((TextureD3D11*)&texture); } static void NRI_CALL DestroyDescriptor(Descriptor& descriptor) { - if (!(&descriptor)) - return; - - DeviceD3D11& device = ((DescriptorD3D11&)descriptor).GetDevice(); - device.DestroyDescriptor(descriptor); + Destroy((DescriptorD3D11*)&descriptor); } static void NRI_CALL DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - if (!(&pipelineLayout)) - return; - - DeviceD3D11& device = ((PipelineLayoutD3D11&)pipelineLayout).GetDevice(); - device.DestroyPipelineLayout(pipelineLayout); + Destroy((PipelineLayoutD3D11*)&pipelineLayout); } static void NRI_CALL DestroyPipeline(Pipeline& pipeline) { - if (!(&pipeline)) - return; - - DeviceD3D11& device = ((PipelineD3D11&)pipeline).GetDevice(); - device.DestroyPipeline(pipeline); + Destroy((PipelineD3D11*)&pipeline); } static void NRI_CALL DestroyQueryPool(QueryPool& queryPool) { - if (!(&queryPool)) - return; - - DeviceD3D11& device = ((QueryPoolD3D11&)queryPool).GetDevice(); - device.DestroyQueryPool(queryPool); + Destroy((QueryPoolD3D11*)&queryPool); } static void NRI_CALL DestroyFence(Fence& fence) { - if (!(&fence)) - return; - - DeviceD3D11& device = ((FenceD3D11&)fence).GetDevice(); - device.DestroyFence(fence); + Destroy((FenceD3D11*)&fence); } static Result NRI_CALL AllocateMemory(Device& device, const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - return ((DeviceD3D11&)device).AllocateMemory(allocateMemoryDesc, memory); + return ((DeviceD3D11&)device).CreateImplementation(memory, allocateMemoryDesc); } static Result NRI_CALL BindBufferMemory(Device& device, const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { @@ -178,11 +150,7 @@ static Result NRI_CALL BindTextureMemory(Device& device, const TextureMemoryBind } static void NRI_CALL FreeMemory(Memory& memory) { - if (!(&memory)) - return; - - DeviceD3D11& device = ((MemoryD3D11&)memory).GetDevice(); - device.FreeMemory(memory); + Destroy((MemoryD3D11*)&memory); } static void NRI_CALL SetDeviceDebugName(Device& device, const char* name) { @@ -208,92 +176,27 @@ static void* NRI_CALL GetDeviceNativeObject(const Device& device) { return ((DeviceD3D11&)device).GetNativeObject(); } -void Core_CommandBufferEmu_PartiallyFillFunctionTable(CoreInterface& coreInterface); - -Result DeviceD3D11::FillFunctionTable(CoreInterface& coreInterface) const { - coreInterface = {}; - coreInterface.GetDeviceDesc = ::GetDeviceDesc; - coreInterface.GetBufferDesc = ::GetBufferDesc; - coreInterface.GetTextureDesc = ::GetTextureDesc; - coreInterface.GetFormatSupport = ::GetFormatSupport; - coreInterface.GetBufferMemoryDesc = ::GetBufferMemoryDesc; - coreInterface.GetTextureMemoryDesc = ::GetTextureMemoryDesc; - coreInterface.GetCommandQueue = ::GetCommandQueue; - coreInterface.CreateCommandAllocator = ::CreateCommandAllocator; - coreInterface.CreateDescriptorPool = ::CreateDescriptorPool; - coreInterface.CreateBuffer = ::CreateBuffer; - coreInterface.CreateTexture = ::CreateTexture; - coreInterface.CreateBufferView = ::CreateBufferView; - coreInterface.CreateTexture1DView = ::CreateTexture1DView; - coreInterface.CreateTexture2DView = ::CreateTexture2DView; - coreInterface.CreateTexture3DView = ::CreateTexture3DView; - coreInterface.CreateSampler = ::CreateSampler; - coreInterface.CreatePipelineLayout = ::CreatePipelineLayout; - coreInterface.CreateGraphicsPipeline = ::CreateGraphicsPipeline; - coreInterface.CreateComputePipeline = ::CreateComputePipeline; - coreInterface.CreateQueryPool = ::CreateQueryPool; - coreInterface.CreateFence = ::CreateFence; - coreInterface.DestroyCommandAllocator = ::DestroyCommandAllocator; - coreInterface.DestroyDescriptorPool = ::DestroyDescriptorPool; - coreInterface.DestroyBuffer = ::DestroyBuffer; - coreInterface.DestroyTexture = ::DestroyTexture; - coreInterface.DestroyDescriptor = ::DestroyDescriptor; - coreInterface.DestroyPipelineLayout = ::DestroyPipelineLayout; - coreInterface.DestroyPipeline = ::DestroyPipeline; - coreInterface.DestroyQueryPool = ::DestroyQueryPool; - coreInterface.DestroyFence = ::DestroyFence; - coreInterface.AllocateMemory = ::AllocateMemory; - coreInterface.BindBufferMemory = ::BindBufferMemory; - coreInterface.BindTextureMemory = ::BindTextureMemory; - coreInterface.FreeMemory = ::FreeMemory; - coreInterface.SetDeviceDebugName = ::SetDeviceDebugName; - coreInterface.SetPipelineDebugName = ::SetPipelineDebugName; - coreInterface.SetPipelineLayoutDebugName = ::SetPipelineLayoutDebugName; - coreInterface.SetMemoryDebugName = ::SetMemoryDebugName; - coreInterface.GetDeviceNativeObject = ::GetDeviceNativeObject; - - Core_Buffer_PartiallyFillFunctionTableD3D11(coreInterface); - Core_CommandAllocator_PartiallyFillFunctionTableD3D11(coreInterface); - Core_CommandQueue_PartiallyFillFunctionTableD3D11(coreInterface); - Core_Descriptor_PartiallyFillFunctionTableD3D11(coreInterface); - Core_DescriptorPool_PartiallyFillFunctionTableD3D11(coreInterface); - Core_DescriptorSet_PartiallyFillFunctionTableD3D11(coreInterface); - Core_Fence_PartiallyFillFunctionTableD3D11(coreInterface); - Core_QueryPool_PartiallyFillFunctionTableD3D11(coreInterface); - Core_Texture_PartiallyFillFunctionTableD3D11(coreInterface); +void Core_CommandBufferEmu_PartiallyFillFunctionTable(CoreInterface& table); + +Result DeviceD3D11::FillFunctionTable(CoreInterface& table) const { + table = {}; + Core_Device_PartiallyFillFunctionTableD3D11(table); + Core_Buffer_PartiallyFillFunctionTableD3D11(table); + Core_CommandAllocator_PartiallyFillFunctionTableD3D11(table); + Core_CommandQueue_PartiallyFillFunctionTableD3D11(table); + Core_Descriptor_PartiallyFillFunctionTableD3D11(table); + Core_DescriptorPool_PartiallyFillFunctionTableD3D11(table); + Core_DescriptorSet_PartiallyFillFunctionTableD3D11(table); + Core_Fence_PartiallyFillFunctionTableD3D11(table); + Core_QueryPool_PartiallyFillFunctionTableD3D11(table); + Core_Texture_PartiallyFillFunctionTableD3D11(table); if (m_IsDeferredContextEmulated) - Core_CommandBufferEmu_PartiallyFillFunctionTable(coreInterface); + Core_CommandBufferEmu_PartiallyFillFunctionTable(table); else - Core_CommandBuffer_PartiallyFillFunctionTableD3D11(coreInterface); + Core_CommandBuffer_PartiallyFillFunctionTableD3D11(table); - return ValidateFunctionTable(coreInterface); -} - -#pragma endregion - -#pragma region[ SwapChain ] - -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceD3D11&)device).CreateSwapChain(swapChainDesc, swapChain); -} - -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; - - DeviceD3D11& device = ((SwapChainD3D11&)swapChain).GetDevice(); - return device.DestroySwapChain(swapChain); -} - -Result DeviceD3D11::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - swapChainInterface = {}; - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; - - SwapChain_PartiallyFillFunctionTableD3D11(swapChainInterface); - - return ValidateFunctionTable(swapChainInterface); + return ValidateFunctionTable(table); } #pragma endregion @@ -320,81 +223,70 @@ static Result NRI_CALL QueryVideoMemoryInfo(const Device& device, MemoryLocation return QueryVideoMemoryInfoDXGI(luid, memoryLocation, videoMemoryInfo); } -Result DeviceD3D11::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CalculateAllocationNumber; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; - helperInterface.QueryVideoMemoryInfo = ::QueryVideoMemoryInfo; +Result DeviceD3D11::FillFunctionTable(HelperInterface& table) const { + table = {}; + Helper_CommandQueue_PartiallyFillFunctionTableD3D11(table); + Helper_Device_PartiallyFillFunctionTableD3D11(table); - Helper_CommandQueue_PartiallyFillFunctionTableD3D11(helperInterface); - - return ValidateFunctionTable(helperInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ WrapperD3D11 ] +#pragma region[ LowLatency ] -static Result NRI_CALL CreateCommandBuffer(Device& device, const CommandBufferD3D11Desc& commandBufferD3D11Desc, CommandBuffer*& commandBuffer) { - DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; +Result DeviceD3D11::FillFunctionTable(LowLatencyInterface& table) const { + table = {}; + if (!m_Desc.isLowLatencySupported) + return Result::UNSUPPORTED; - return ::CreateCommandBuffer(deviceD3D11, commandBufferD3D11Desc.d3d11DeviceContext, commandBuffer); + LowLatency_CommandQueue_PartiallyFillFunctionTableD3D11(table); + LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTableD3D11(table); + + return ValidateFunctionTable(table); } -static Result NRI_CALL CreateBuffer(Device& device, const BufferD3D11Desc& bufferD3D11Desc, Buffer*& buffer) { - DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; +#pragma endregion - BufferD3D11* implementation = Allocate(deviceD3D11.GetStdAllocator(), deviceD3D11); - Result res = implementation->Create(bufferD3D11Desc); +#pragma region[ ResourceAllocator ] - if (res == Result::SUCCESS) { - buffer = (Buffer*)implementation; - return Result::SUCCESS; +static Result AllocateBuffer(Device& device, const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { + Result result = ((DeviceD3D11&)device).CreateImplementation(buffer, bufferDesc.desc); + if (result == Result::SUCCESS) { + result = ((BufferD3D11*)buffer)->Create(bufferDesc.memoryLocation, bufferDesc.memoryPriority); + if (result != Result::SUCCESS) { + Destroy(((DeviceD3D11&)device).GetStdAllocator(), (BufferD3D11*)buffer); + buffer = nullptr; + } } - Deallocate(deviceD3D11.GetStdAllocator(), implementation); - - return res; + return result; } -static Result NRI_CALL CreateTexture(Device& device, const TextureD3D11Desc& textureD3D11Desc, Texture*& texture) { - DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; - - TextureD3D11* implementation = Allocate(deviceD3D11.GetStdAllocator(), deviceD3D11); - Result res = implementation->Create(textureD3D11Desc); - - if (res == Result::SUCCESS) { - texture = (Texture*)implementation; - return Result::SUCCESS; +static Result AllocateTexture(Device& device, const AllocateTextureDesc& textureDesc, Texture*& texture) { + Result result = ((DeviceD3D11&)device).CreateImplementation(texture, textureDesc.desc); + if (result == Result::SUCCESS) { + result = ((TextureD3D11*)texture)->Create(textureDesc.memoryLocation, textureDesc.memoryPriority); + if (result != Result::SUCCESS) { + Destroy(((DeviceD3D11&)device).GetStdAllocator(), (TextureD3D11*)texture); + texture = nullptr; + } } - Deallocate(deviceD3D11.GetStdAllocator(), implementation); - - return res; + return result; } -Result DeviceD3D11::FillFunctionTable(WrapperD3D11Interface& wrapperD3D11Interface) const { - wrapperD3D11Interface = {}; - wrapperD3D11Interface.CreateCommandBufferD3D11 = ::CreateCommandBuffer; - wrapperD3D11Interface.CreateTextureD3D11 = ::CreateTexture; - wrapperD3D11Interface.CreateBufferD3D11 = ::CreateBuffer; +static Result AllocateAccelerationStructure(Device& device, const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + MaybeUnused(device, accelerationStructureDesc, accelerationStructure); - return ValidateFunctionTable(wrapperD3D11Interface); + return Result::UNSUPPORTED; } -#pragma endregion - -#pragma region[ LowLatency ] - -Result DeviceD3D11::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { - lowLatencyInterface = {}; - if (!m_Desc.isLowLatencySupported) - return Result::UNSUPPORTED; +Result DeviceD3D11::FillFunctionTable(ResourceAllocatorInterface& table) const { + table = {}; + ResourceAllocator_Device_PartiallyFillFunctionTableD3D11(table); - LowLatency_CommandQueue_PartiallyFillFunctionTableD3D11(lowLatencyInterface); - LowLatency_SwapChain_PartiallyFillFunctionTableD3D11(lowLatencyInterface); - - return ValidateFunctionTable(lowLatencyInterface); + return ValidateFunctionTable(table); } #pragma endregion @@ -403,22 +295,20 @@ Result DeviceD3D11::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; + StreamerImpl* impl = Allocate(deviceD3D11.GetStdAllocator(), device, deviceD3D11.GetCoreInterface()); + Result result = impl->Create(streamerDesc); - StreamerImpl* implementation = Allocate(deviceD3D11.GetStdAllocator(), device, deviceD3D11.GetCoreInterface()); - Result res = implementation->Create(streamerDesc); - - if (res == Result::SUCCESS) { - streamer = (Streamer*)implementation; - return Result::SUCCESS; - } - - Deallocate(deviceD3D11.GetStdAllocator(), implementation); + if (result != Result::SUCCESS) { + Destroy(deviceD3D11.GetStdAllocator(), impl); + streamer = nullptr; + } else + streamer = (Streamer*)impl; - return res; + return result; } static void DestroyStreamer(Streamer& streamer) { - Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); + Destroy((StreamerImpl*)&streamer); } static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { @@ -449,19 +339,63 @@ static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Stream ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); } -Result DeviceD3D11::FillFunctionTable(StreamerInterface& streamerInterface) const { - streamerInterface = {}; - streamerInterface.CreateStreamer = ::CreateStreamer; - streamerInterface.DestroyStreamer = ::DestroyStreamer; - streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; - streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; - streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; - streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; - streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; - streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; - streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; +Result DeviceD3D11::FillFunctionTable(StreamerInterface& table) const { + table = {}; + Streamer_Device_PartiallyFillFunctionTableD3D11(table); - return ValidateFunctionTable(streamerInterface); + return ValidateFunctionTable(table); } #pragma endregion + +#pragma region[ SwapChain ] + +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceD3D11&)device).CreateImplementation(swapChain, swapChainDesc); +} + +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + Destroy((SwapChainD3D11*)&swapChain); +} + +Result DeviceD3D11::FillFunctionTable(SwapChainInterface& table) const { + table = {}; + SwapChain_Device_PartiallyFillFunctionTableD3D11(table); + SwapChain_SwapChain_PartiallyFillFunctionTableD3D11(table); + + return ValidateFunctionTable(table); +} + +#pragma endregion + +#pragma region[ WrapperD3D11 ] + +static Result NRI_CALL CreateCommandBufferD3D11(Device& device, const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { + DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; + + return ::CreateCommandBuffer(deviceD3D11, commandBufferDesc.d3d11DeviceContext, commandBuffer); +} + +static Result NRI_CALL CreateBufferD3D11(Device& device, const BufferD3D11Desc& bufferDesc, Buffer*& buffer) { + return ((DeviceD3D11&)device).CreateImplementation(buffer, bufferDesc); +} + +static Result NRI_CALL CreateTextureD3D11(Device& device, const TextureD3D11Desc& textureDesc, Texture*& texture) { + return ((DeviceD3D11&)device).CreateImplementation(texture, textureDesc); +} + +Result DeviceD3D11::FillFunctionTable(WrapperD3D11Interface& table) const { + table = {}; + WrapperD3D11_Device_PartiallyFillFunctionTableD3D11(table); + + return ValidateFunctionTable(table); +} + +#pragma endregion + +Define_Core_Device_PartiallyFillFunctionTable(D3D11); +Define_Helper_Device_PartiallyFillFunctionTable(D3D11); +Define_Streamer_Device_PartiallyFillFunctionTable(D3D11); +Define_SwapChain_Device_PartiallyFillFunctionTable(D3D11); +Define_ResourceAllocator_Device_PartiallyFillFunctionTable(D3D11); +Define_WrapperD3D11_Device_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/MemoryD3D11.cpp b/Source/D3D11/MemoryD3D11.cpp index 4a5f6528..7fb4846b 100644 --- a/Source/D3D11/MemoryD3D11.cpp +++ b/Source/D3D11/MemoryD3D11.cpp @@ -6,34 +6,9 @@ using namespace nri; -MemoryD3D11::MemoryD3D11(DeviceD3D11& device, const AllocateMemoryDesc& allocateMemoryDesc) : m_Device(device) { +Result MemoryD3D11::Create(const AllocateMemoryDesc& allocateMemoryDesc) { m_Location = (MemoryLocation)allocateMemoryDesc.type; + m_Priority = allocateMemoryDesc.priority; - if (allocateMemoryDesc.priority != 0.0f) { - float p = allocateMemoryDesc.priority * 0.5f + 0.5f; - float level = 0.0f; - - if (p < 0.2f) { - m_Priority = DXGI_RESOURCE_PRIORITY_MINIMUM; - level = 0.0f; - } else if (p < 0.4f) { - m_Priority = DXGI_RESOURCE_PRIORITY_LOW; - level = 0.2f; - } else if (p < 0.6f) { - m_Priority = DXGI_RESOURCE_PRIORITY_NORMAL; - level = 0.4f; - } else if (p < 0.8f) { - m_Priority = DXGI_RESOURCE_PRIORITY_HIGH; - level = 0.6f; - } else { - m_Priority = DXGI_RESOURCE_PRIORITY_MAXIMUM; - level = 0.8f; - } - - uint32_t bonus = uint32_t(((p - level) / 0.2f) * 65535.0f); - if (bonus > 0xFFFF) - bonus = 0xFFFF; - - m_Priority |= bonus; - } + return Result::SUCCESS; } diff --git a/Source/D3D11/MemoryD3D11.h b/Source/D3D11/MemoryD3D11.h index e5632668..1dc7d3c8 100644 --- a/Source/D3D11/MemoryD3D11.h +++ b/Source/D3D11/MemoryD3D11.h @@ -16,7 +16,8 @@ enum class MemoryResidencyPriority { }; struct MemoryD3D11 { - MemoryD3D11(DeviceD3D11& device, const AllocateMemoryDesc& allocateMemoryDesc); + inline MemoryD3D11(DeviceD3D11& device) : m_Device(device) { + } inline ~MemoryD3D11() { } @@ -29,10 +30,12 @@ struct MemoryD3D11 { return m_Location; } - inline uint32_t GetPriority() const { + inline float GetPriority() const { return m_Priority; } + Result Create(const AllocateMemoryDesc& allocateMemoryDesc); + //================================================================================================================ // NRI //================================================================================================================ @@ -44,7 +47,7 @@ struct MemoryD3D11 { private: DeviceD3D11& m_Device; MemoryLocation m_Location = MemoryLocation::DEVICE; - uint32_t m_Priority = 0; + float m_Priority = 0.0f; }; } // namespace nri diff --git a/Source/D3D11/PipelineD3D11.cpp b/Source/D3D11/PipelineD3D11.cpp index 67640f36..a64ead48 100644 --- a/Source/D3D11/PipelineD3D11.cpp +++ b/Source/D3D11/PipelineD3D11.cpp @@ -55,7 +55,7 @@ Result PipelineD3D11::Create(const GraphicsPipelineDesc& pipelineDesc) { } m_InputAssemplyStrides.resize(maxBindingSlot + 1); - D3D11_INPUT_ELEMENT_DESC* inputElements = STACK_ALLOC(D3D11_INPUT_ELEMENT_DESC, vi.attributeNum); + D3D11_INPUT_ELEMENT_DESC* inputElements = StackAlloc(D3D11_INPUT_ELEMENT_DESC, vi.attributeNum); for (uint32_t i = 0; i < vi.attributeNum; i++) { const VertexAttributeDesc& attrIn = vi.attributes[i]; const VertexStreamDesc& stream = vi.streams[attrIn.streamIndex]; diff --git a/Source/D3D11/PipelineLayoutD3D11.cpp b/Source/D3D11/PipelineLayoutD3D11.cpp index d13e5edc..e7086a2b 100644 --- a/Source/D3D11/PipelineLayoutD3D11.cpp +++ b/Source/D3D11/PipelineLayoutD3D11.cpp @@ -164,7 +164,7 @@ void PipelineLayoutD3D11::BindDescriptorSetImpl(BindingState& currentBindingStat const BindingSet& bindingSet = m_BindingSets[setIndexInPipelineLayout]; bool isStorageRebindNeededInGraphics = false; - uint8_t* memory = STACK_ALLOC(uint8_t, bindingSet.descriptorNum * (sizeof(void*) + sizeof(uint32_t) * 2)); + uint8_t* memory = StackAlloc(uint8_t, bindingSet.descriptorNum * (sizeof(void*) + sizeof(uint32_t) * 2)); void** descriptors = (void**)memory; memory += bindingSet.descriptorNum * sizeof(void*); diff --git a/Source/D3D11/SharedD3D11.cpp b/Source/D3D11/SharedD3D11.cpp index e30ae615..dff95a91 100644 --- a/Source/D3D11/SharedD3D11.cpp +++ b/Source/D3D11/SharedD3D11.cpp @@ -236,3 +236,37 @@ bool nri::GetBufferDesc(const BufferD3D11Desc& bufferD3D11Desc, BufferDesc& buff return true; } + +uint32_t nri::ConvertPriority(float priority) { + if (priority == 0.0f) + return 0; + + float p = priority * 0.5f + 0.5f; + float level = 0.0f; + + uint32_t result = 0; + if (p < 0.2f) { + result = DXGI_RESOURCE_PRIORITY_MINIMUM; + level = 0.0f; + } else if (p < 0.4f) { + result = DXGI_RESOURCE_PRIORITY_LOW; + level = 0.2f; + } else if (p < 0.6f) { + result = DXGI_RESOURCE_PRIORITY_NORMAL; + level = 0.4f; + } else if (p < 0.8f) { + result = DXGI_RESOURCE_PRIORITY_HIGH; + level = 0.6f; + } else { + result = DXGI_RESOURCE_PRIORITY_MAXIMUM; + level = 0.8f; + } + + uint32_t bonus = uint32_t(((p - level) / 0.2f) * 65535.0f); + if (bonus > 0xFFFF) + bonus = 0xFFFF; + + result |= bonus; + + return result; +} diff --git a/Source/D3D11/SharedD3D11.h b/Source/D3D11/SharedD3D11.h index 7f2184b6..69a58c5b 100644 --- a/Source/D3D11/SharedD3D11.h +++ b/Source/D3D11/SharedD3D11.h @@ -15,18 +15,6 @@ namespace nri { constexpr Mip_t NULL_TEXTURE_REGION_DESC = Mip_t(-1); -enum class BufferType { - DEVICE, - DYNAMIC, - READBACK, - UPLOAD -}; - -enum class MapType { - DEFAULT, - READ -}; - enum class DescriptorTypeDX11 : uint8_t { // don't change order NO_SHADER_VISIBLE, @@ -47,6 +35,7 @@ D3D11_BLEND GetD3D11BlendFromBlendFactor(BlendFactor blendFactor); D3D11_LOGIC_OP GetD3D11LogicOpFromLogicFunc(LogicFunc logicalFunc); bool GetTextureDesc(const TextureD3D11Desc& textureD3D11Desc, TextureDesc& textureDesc); bool GetBufferDesc(const BufferD3D11Desc& bufferD3D11Desc, BufferDesc& bufferDesc); +uint32_t ConvertPriority(float priority); struct SubresourceInfo { const void* resource = nullptr; diff --git a/Source/D3D11/SwapChainD3D11.cpp b/Source/D3D11/SwapChainD3D11.cpp index 4a715d55..4f55fe05 100644 --- a/Source/D3D11/SwapChainD3D11.cpp +++ b/Source/D3D11/SwapChainD3D11.cpp @@ -46,7 +46,7 @@ SwapChainD3D11::~SwapChainD3D11() { CloseHandle(m_FrameLatencyWaitableObject); for (TextureD3D11* texture : m_Textures) - Deallocate(m_Device.GetStdAllocator(), texture); + Destroy(m_Device.GetStdAllocator(), texture); } Result SwapChainD3D11::Create(const SwapChainDesc& swapChainDesc) { diff --git a/Source/D3D11/SwapChainD3D11.hpp b/Source/D3D11/SwapChainD3D11.hpp index 816ff571..8d00688c 100644 --- a/Source/D3D11/SwapChainD3D11.hpp +++ b/Source/D3D11/SwapChainD3D11.hpp @@ -48,5 +48,5 @@ static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latenc #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(D3D11); -Define_LowLatency_SwapChain_PartiallyFillFunctionTable(D3D11); +Define_SwapChain_SwapChain_PartiallyFillFunctionTable(D3D11); +Define_LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/TextureD3D11.cpp b/Source/D3D11/TextureD3D11.cpp index c4a15ebf..a0b2062c 100644 --- a/Source/D3D11/TextureD3D11.cpp +++ b/Source/D3D11/TextureD3D11.cpp @@ -7,7 +7,7 @@ using namespace nri; -Result TextureD3D11::Create(const MemoryD3D11* memory) { +Result TextureD3D11::Create(MemoryLocation memoryLocation, float priority) { // Texture was already created externally if (m_Texture) return Result::SUCCESS; @@ -24,12 +24,20 @@ Result TextureD3D11::Create(const MemoryD3D11* memory) { if (m_Desc.usageMask & TextureUsageBits::DEPTH_STENCIL_ATTACHMENT) bindFlags |= D3D11_BIND_DEPTH_STENCIL; - uint32_t cpuAccessFlags = D3D11_CPU_ACCESS_READ; - D3D11_USAGE usage = D3D11_USAGE_STAGING; - if (memory) { - MemoryLocation memoryLocation = memory->GetLocation(); - usage = (memoryLocation == MemoryLocation::HOST_UPLOAD || memoryLocation == MemoryLocation::DEVICE_UPLOAD) ? D3D11_USAGE_DYNAMIC : D3D11_USAGE_DEFAULT; - cpuAccessFlags = 0; + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + uint32_t cpuAccessFlags = 0; + switch (memoryLocation) { + case MemoryLocation::DEVICE: + break; + case MemoryLocation::DEVICE_UPLOAD: + case MemoryLocation::HOST_UPLOAD: + usage = D3D11_USAGE_DYNAMIC; + cpuAccessFlags = D3D11_CPU_ACCESS_WRITE; + break; + case MemoryLocation::HOST_READBACK: + usage = D3D11_USAGE_STAGING; + cpuAccessFlags = D3D11_CPU_ACCESS_READ; + break; } HRESULT hr = E_INVALIDARG; @@ -76,9 +84,16 @@ Result TextureD3D11::Create(const MemoryD3D11* memory) { RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D11Device::CreateTextureXx()"); - uint32_t priority = memory ? memory->GetPriority() : 0; - if (priority != 0) - m_Texture->SetEvictionPriority(priority); + // Priority + uint32_t evictionPriority = ConvertPriority(priority); + if (evictionPriority != 0) + m_Texture->SetEvictionPriority(evictionPriority); + + return Result::SUCCESS; +} + +Result TextureD3D11::Create(const TextureDesc& textureDesc) { + m_Desc = textureDesc; return Result::SUCCESS; } diff --git a/Source/D3D11/TextureD3D11.h b/Source/D3D11/TextureD3D11.h index 05ed8821..741cd2ae 100644 --- a/Source/D3D11/TextureD3D11.h +++ b/Source/D3D11/TextureD3D11.h @@ -11,9 +11,6 @@ struct TextureD3D11 { inline TextureD3D11(DeviceD3D11& device) : m_Device(device) { } - TextureD3D11(DeviceD3D11& device, const TextureDesc& textureDesc) : m_Device(device), m_Desc(textureDesc) { - } - inline ~TextureD3D11() { } @@ -49,8 +46,9 @@ struct TextureD3D11 { return GetDimension(GraphicsAPI::D3D11, m_Desc, dimensionIndex, mip); } - Result Create(const MemoryD3D11* memory); + Result Create(const TextureDesc& textureDesc); Result Create(const TextureD3D11Desc& textureDesc); + Result Create(MemoryLocation memoryLocation, float priority); static uint32_t GetMipmappedSize(const TextureDesc& textureDesc); diff --git a/Source/D3D12/AccelerationStructureD3D12.cpp b/Source/D3D12/AccelerationStructureD3D12.cpp index 90b40986..6cf991ae 100644 --- a/Source/D3D12/AccelerationStructureD3D12.cpp +++ b/Source/D3D12/AccelerationStructureD3D12.cpp @@ -4,11 +4,12 @@ #include "AccelerationStructureD3D12.h" #include "BufferD3D12.h" +#include "DescriptorD3D12.h" using namespace nri; AccelerationStructureD3D12::~AccelerationStructureD3D12() { - Deallocate(m_Device.GetStdAllocator(), m_Buffer); + Destroy(m_Device.GetStdAllocator(), m_Buffer); } Result AccelerationStructureD3D12::Create(const AccelerationStructureD3D12Desc& accelerationStructureDesc) { @@ -18,60 +19,32 @@ Result AccelerationStructureD3D12::Create(const AccelerationStructureD3D12Desc& BufferD3D12Desc bufferDesc = {}; bufferDesc.d3d12Resource = accelerationStructureDesc.d3d12Resource; - return m_Device.CreateBuffer(bufferDesc, (Buffer*&)m_Buffer); + return m_Device.CreateImplementation((Buffer*&)m_Buffer, bufferDesc); } Result AccelerationStructureD3D12::Create(const AccelerationStructureDesc& accelerationStructureDesc) { if (m_Device.GetVersion() < 5) return Result::UNSUPPORTED; - D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS accelerationStructureInputs = {}; - accelerationStructureInputs.Type = GetAccelerationStructureType(accelerationStructureDesc.type); - accelerationStructureInputs.Flags = GetAccelerationStructureBuildFlags(accelerationStructureDesc.flags); - accelerationStructureInputs.NumDescs = accelerationStructureDesc.instanceOrGeometryObjectNum; - accelerationStructureInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; // TODO: D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS support? - - Vector geometryDescs(accelerationStructureDesc.instanceOrGeometryObjectNum, m_Device.GetStdAllocator()); - if (accelerationStructureInputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL && accelerationStructureDesc.instanceOrGeometryObjectNum) { - ConvertGeometryDescs(&geometryDescs[0], accelerationStructureDesc.geometryObjects, accelerationStructureDesc.instanceOrGeometryObjectNum); - accelerationStructureInputs.pGeometryDescs = &geometryDescs[0]; - } - - m_Device->GetRaytracingAccelerationStructurePrebuildInfo(&accelerationStructureInputs, &m_PrebuildInfo); + m_Device.GetAccelerationStructurePrebuildInfo(accelerationStructureDesc, m_PrebuildInfo); BufferDesc bufferDesc = {}; bufferDesc.size = m_PrebuildInfo.ResultDataMaxSizeInBytes; - bufferDesc.usageMask = BufferUsageBits::SHADER_RESOURCE_STORAGE; - - m_Buffer = Allocate(m_Device.GetStdAllocator(), m_Device); - Result result = m_Buffer->Create(bufferDesc); - - return result; -} + bufferDesc.usageMask = BufferUsageBits::RAY_TRACING_BUFFER; -void AccelerationStructureD3D12::GetMemoryDesc(MemoryDesc& memoryDesc) const { - m_Device.GetAccelerationStructureMemoryDesc(m_PrebuildInfo.ResultDataMaxSizeInBytes, memoryDesc); -} - -uint64_t AccelerationStructureD3D12::GetUpdateScratchBufferSize() const { - return Align(m_PrebuildInfo.UpdateScratchDataSizeInBytes, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT); -} - -uint64_t AccelerationStructureD3D12::GetBuildScratchBufferSize() const { - return Align(m_PrebuildInfo.ScratchDataSizeInBytes, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT); + return m_Device.CreateImplementation((Buffer*&)m_Buffer, bufferDesc); } Result AccelerationStructureD3D12::BindMemory(Memory* memory, uint64_t offset) { - Result result = m_Buffer->BindMemory((MemoryD3D12*)memory, offset, true); + Result result = m_Buffer->BindMemory((MemoryD3D12*)memory, offset, ACCELERATION_STRUCTURE_BUFFER); return result; } Result AccelerationStructureD3D12::CreateDescriptor(Descriptor*& descriptor) const { const AccelerationStructure& accelerationStructure = (const AccelerationStructure&)*this; - Result result = m_Device.CreateDescriptor(accelerationStructure, descriptor); - return result; + return m_Device.CreateImplementation(descriptor, accelerationStructure); } uint64_t AccelerationStructureD3D12::GetHandle() const { diff --git a/Source/D3D12/AccelerationStructureD3D12.h b/Source/D3D12/AccelerationStructureD3D12.h index 18e8f26e..bb139868 100644 --- a/Source/D3D12/AccelerationStructureD3D12.h +++ b/Source/D3D12/AccelerationStructureD3D12.h @@ -19,13 +19,19 @@ struct AccelerationStructureD3D12 { return m_Buffer; } + inline uint64_t GetUpdateScratchBufferSize() const { + return m_PrebuildInfo.UpdateScratchDataSizeInBytes; + } + + inline uint64_t GetBuildScratchBufferSize() const { + return m_PrebuildInfo.ScratchDataSizeInBytes; + } + ~AccelerationStructureD3D12(); Result Create(const AccelerationStructureDesc& accelerationStructureDesc); Result Create(const AccelerationStructureD3D12Desc& accelerationStructureDesc); - void GetMemoryDesc(MemoryDesc& memoryDesc) const; - uint64_t GetUpdateScratchBufferSize() const; - uint64_t GetBuildScratchBufferSize() const; + Result Create(const AllocateAccelerationStructureDesc& accelerationStructureDesc); Result BindMemory(Memory* memory, uint64_t offset); Result CreateDescriptor(Descriptor*& descriptor) const; diff --git a/Source/D3D12/AccelerationStructureD3D12.hpp b/Source/D3D12/AccelerationStructureD3D12.hpp index b550336e..9d07256c 100644 --- a/Source/D3D12/AccelerationStructureD3D12.hpp +++ b/Source/D3D12/AccelerationStructureD3D12.hpp @@ -6,10 +6,6 @@ static Result NRI_CALL CreateAccelerationStructureDescriptor(const AccelerationS return ((AccelerationStructureD3D12&)accelerationStructure).CreateDescriptor(descriptor); } -static void NRI_CALL GetAccelerationStructureMemoryDesc(const AccelerationStructure& accelerationStructure, MemoryDesc& memoryDesc) { - ((AccelerationStructureD3D12&)accelerationStructure).GetMemoryDesc(memoryDesc); -} - static uint64_t NRI_CALL GetAccelerationStructureUpdateScratchBufferSize(const AccelerationStructure& accelerationStructure) { return ((AccelerationStructureD3D12&)accelerationStructure).GetUpdateScratchBufferSize(); } diff --git a/Source/D3D12/BufferD3D12.cpp b/Source/D3D12/BufferD3D12.cpp index 844fe713..15c8f8c9 100644 --- a/Source/D3D12/BufferD3D12.cpp +++ b/Source/D3D12/BufferD3D12.cpp @@ -53,16 +53,16 @@ Result BufferD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset, bool D3D12_RESOURCE_DESC1 desc1 = {}; GetResourceDesc((D3D12_RESOURCE_DESC*)&desc1, m_Desc); + const D3D12_BARRIER_LAYOUT initialLayout = D3D12_BARRIER_LAYOUT_UNDEFINED; uint32_t castableFormatNum = 0; DXGI_FORMAT* castableFormats = nullptr; // TODO: add castable formats, see options12.RelaxedFormatCastingSupported - if (memory->RequiresDedicatedAllocation()) { + if (memory->IsDummy()) { HRESULT hr = m_Device->CreateCommittedResource3( - &heapDesc.Properties, heapFlagsFixed, &desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, nullptr, castableFormatNum, castableFormats, IID_PPV_ARGS(&m_Buffer)); + &heapDesc.Properties, heapFlagsFixed, &desc1, initialLayout, nullptr, nullptr, castableFormatNum, castableFormats, IID_PPV_ARGS(&m_Buffer)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device10::CreateCommittedResource3()"); } else { - HRESULT hr = - m_Device->CreatePlacedResource2(*memory, offset, &desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, castableFormatNum, castableFormats, IID_PPV_ARGS(&m_Buffer)); + HRESULT hr = m_Device->CreatePlacedResource2(*memory, offset, &desc1, initialLayout, nullptr, castableFormatNum, castableFormats, IID_PPV_ARGS(&m_Buffer)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device10::CreatePlacedResource2()"); } } else @@ -80,7 +80,7 @@ Result BufferD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset, bool if (isAccelerationStructureBuffer) initialState |= D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; - if (memory->RequiresDedicatedAllocation()) { + if (memory->IsDummy()) { HRESULT hr = m_Device->CreateCommittedResource(&heapDesc.Properties, heapFlagsFixed, &desc, initialState, nullptr, IID_PPV_ARGS(&m_Buffer)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device::CreateCommittedResource()"); } else { @@ -89,8 +89,13 @@ Result BufferD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset, bool } } - if (memory->RequiresDedicatedAllocation()) - memory->SetPriority(m_Buffer.GetInterface()); + // Priority + D3D12_RESIDENCY_PRIORITY residencyPriority = (D3D12_RESIDENCY_PRIORITY)ConvertPriority(memory->GetPriority()); + if (m_Device.GetVersion() >= 1 && residencyPriority != 0) { + ID3D12Pageable* obj = m_Buffer.GetInterface(); + HRESULT hr = m_Device->SetResidencyPriority(1, &obj, &residencyPriority); + RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device1::SetResidencyPriority()"); + } return Result::SUCCESS; } diff --git a/Source/D3D12/BufferD3D12.h b/Source/D3D12/BufferD3D12.h index 0a4ebefd..725ded91 100644 --- a/Source/D3D12/BufferD3D12.h +++ b/Source/D3D12/BufferD3D12.h @@ -15,6 +15,8 @@ namespace nri { struct DeviceD3D12; struct MemoryD3D12; +constexpr bool ACCELERATION_STRUCTURE_BUFFER = true; + struct BufferD3D12 { inline BufferD3D12(DeviceD3D12& device) : m_Device(device) { } @@ -40,6 +42,7 @@ struct BufferD3D12 { Result Create(const BufferDesc& bufferDesc); Result Create(const BufferD3D12Desc& bufferDesc); + Result Create(const AllocateBufferDesc& bufferDesc, bool isAccelerationStructureBuffer = false); Result BindMemory(const MemoryD3D12* memory, uint64_t offset, bool isAccelerationStructureBuffer = false); //================================================================================================================ @@ -56,6 +59,7 @@ struct BufferD3D12 { private: DeviceD3D12& m_Device; ComPtr m_Buffer; + ComPtr m_VmaAllocation = nullptr; BufferDesc m_Desc = {}; }; diff --git a/Source/D3D12/CommandAllocatorD3D12.cpp b/Source/D3D12/CommandAllocatorD3D12.cpp index 83ba1cf5..30826658 100644 --- a/Source/D3D12/CommandAllocatorD3D12.cpp +++ b/Source/D3D12/CommandAllocatorD3D12.cpp @@ -30,7 +30,7 @@ inline Result CommandAllocatorD3D12::CreateCommandBuffer(CommandBuffer*& command return Result::SUCCESS; } - Deallocate(m_Device.GetStdAllocator(), commandBufferD3D12); + Destroy(m_Device.GetStdAllocator(), commandBufferD3D12); return result; } diff --git a/Source/D3D12/CommandBufferD3D12.cpp b/Source/D3D12/CommandBufferD3D12.cpp index 7d50b636..821436d7 100644 --- a/Source/D3D12/CommandBufferD3D12.cpp +++ b/Source/D3D12/CommandBufferD3D12.cpp @@ -273,7 +273,7 @@ inline void CommandBufferD3D12::SetViewports(const Viewport* viewports, uint32_t } inline void CommandBufferD3D12::SetScissors(const Rect* rects, uint32_t rectNum) { - D3D12_RECT* rectsD3D12 = STACK_ALLOC(D3D12_RECT, rectNum); + D3D12_RECT* rectsD3D12 = StackAlloc(D3D12_RECT, rectNum); ConvertRects(rectsD3D12, rects, rectNum); m_GraphicsCommandList->RSSetScissorRects(rectNum, rectsD3D12); @@ -307,7 +307,7 @@ inline void CommandBufferD3D12::SetBlendConstants(const Color32f& color) { } inline void CommandBufferD3D12::ClearAttachments(const ClearDesc* clearDescs, uint32_t clearDescNum, const Rect* rects, uint32_t rectNum) { - D3D12_RECT* rectsD3D12 = STACK_ALLOC(D3D12_RECT, rectNum); + D3D12_RECT* rectsD3D12 = StackAlloc(D3D12_RECT, rectNum); ConvertRects(rectsD3D12, rects, rectNum); for (uint32_t i = 0; i < clearDescNum; i++) { @@ -376,7 +376,7 @@ inline void CommandBufferD3D12::BeginRendering(const AttachmentsDesc& attachment } inline void CommandBufferD3D12::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { - D3D12_VERTEX_BUFFER_VIEW* vertexBufferViews = STACK_ALLOC(D3D12_VERTEX_BUFFER_VIEW, bufferNum); + D3D12_VERTEX_BUFFER_VIEW* vertexBufferViews = StackAlloc(D3D12_VERTEX_BUFFER_VIEW, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) { if (buffers[i] != nullptr) { @@ -626,7 +626,7 @@ inline void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroupDesc // Global uint16_t num = barrierGroupDesc.globalNum; - D3D12_GLOBAL_BARRIER* globalBarriers = STACK_ALLOC(D3D12_GLOBAL_BARRIER, num); + D3D12_GLOBAL_BARRIER* globalBarriers = StackAlloc(D3D12_GLOBAL_BARRIER, num); if (num) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_GLOBAL; @@ -646,7 +646,7 @@ inline void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroupDesc // Buffer num = barrierGroupDesc.bufferNum; - D3D12_BUFFER_BARRIER* bufferBarriers = STACK_ALLOC(D3D12_BUFFER_BARRIER, num); + D3D12_BUFFER_BARRIER* bufferBarriers = StackAlloc(D3D12_BUFFER_BARRIER, num); if (barrierGroupDesc.bufferNum) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_BUFFER; @@ -670,7 +670,7 @@ inline void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroupDesc // Texture num = barrierGroupDesc.textureNum; - D3D12_TEXTURE_BARRIER* textureBarriers = STACK_ALLOC(D3D12_TEXTURE_BARRIER, num); + D3D12_TEXTURE_BARRIER* textureBarriers = StackAlloc(D3D12_TEXTURE_BARRIER, num); if (barrierGroupDesc.textureNum) { D3D12_BARRIER_GROUP* barrierGroup = &barrierGroups[barriersGroupsNum++]; barrierGroup->Type = D3D12_BARRIER_TYPE_TEXTURE; @@ -738,7 +738,7 @@ inline void CommandBufferD3D12::Barrier(const BarrierGroupDesc& barrierGroupDesc return; // Gather - D3D12_RESOURCE_BARRIER* barriers = STACK_ALLOC(D3D12_RESOURCE_BARRIER, barrierNum); + D3D12_RESOURCE_BARRIER* barriers = StackAlloc(D3D12_RESOURCE_BARRIER, barrierNum); memset(barriers, 0, sizeof(D3D12_RESOURCE_BARRIER) * barrierNum); D3D12_RESOURCE_BARRIER* ptr = barriers; @@ -804,7 +804,7 @@ inline void CommandBufferD3D12::CopyQueries(const QueryPool& queryPool, uint32_t inline void CommandBufferD3D12::BeginAnnotation(const char* name) { size_t len = strlen(name) + 1; - wchar_t* s = STACK_ALLOC(wchar_t, len); + wchar_t* s = StackAlloc(wchar_t, len); ConvertCharToWchar(name, s, len); PIXBeginEvent(m_GraphicsCommandList, PIX_COLOR_DEFAULT, s); @@ -840,9 +840,9 @@ inline void CommandBufferD3D12::BuildBottomLevelAccelerationStructure( desc.Inputs.NumDescs = geometryObjectNum; desc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; - Vector geometryDescs(geometryObjectNum, m_Device.GetStdAllocator()); - ConvertGeometryDescs(&geometryDescs[0], geometryObjects, geometryObjectNum); - desc.Inputs.pGeometryDescs = &geometryDescs[0]; + Scratch geometryDescs = AllocateScratch(m_Device, D3D12_RAYTRACING_GEOMETRY_DESC, geometryObjectNum); + ConvertGeometryDescs(geometryDescs, geometryObjects, geometryObjectNum); + desc.Inputs.pGeometryDescs = geometryDescs; if (m_Version >= 4) m_GraphicsCommandList->BuildRaytracingAccelerationStructure(&desc, 0, nullptr); @@ -876,9 +876,9 @@ inline void CommandBufferD3D12::UpdateBottomLevelAccelerationStructure(uint32_t desc.Inputs.NumDescs = geometryObjectNum; desc.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; - Vector geometryDescs(geometryObjectNum, m_Device.GetStdAllocator()); - ConvertGeometryDescs(&geometryDescs[0], geometryObjects, geometryObjectNum); - desc.Inputs.pGeometryDescs = &geometryDescs[0]; + Scratch geometryDescs = AllocateScratch(m_Device, D3D12_RAYTRACING_GEOMETRY_DESC, geometryObjectNum); + ConvertGeometryDescs(geometryDescs, geometryObjects, geometryObjectNum); + desc.Inputs.pGeometryDescs = geometryDescs; if (m_Version >= 4) m_GraphicsCommandList->BuildRaytracingAccelerationStructure(&desc, 0, nullptr); @@ -891,7 +891,7 @@ inline void CommandBufferD3D12::CopyAccelerationStructure(AccelerationStructure& inline void CommandBufferD3D12::WriteAccelerationStructureSize( const AccelerationStructure* const* accelerationStructures, uint32_t accelerationStructureNum, QueryPool& queryPool, uint32_t queryOffset) { - D3D12_GPU_VIRTUAL_ADDRESS* virtualAddresses = ALLOCATE_SCRATCH(m_Device, D3D12_GPU_VIRTUAL_ADDRESS, accelerationStructureNum); + Scratch virtualAddresses = AllocateScratch(m_Device, D3D12_GPU_VIRTUAL_ADDRESS, accelerationStructureNum); for (uint32_t i = 0; i < accelerationStructureNum; i++) virtualAddresses[i] = ((AccelerationStructureD3D12&)accelerationStructures[i]).GetHandle(); @@ -901,8 +901,6 @@ inline void CommandBufferD3D12::WriteAccelerationStructureSize( if (m_Version >= 4) m_GraphicsCommandList->EmitRaytracingAccelerationStructurePostbuildInfo(&postbuildInfo, accelerationStructureNum, virtualAddresses); - - FREE_SCRATCH(m_Device, virtualAddresses, accelerationStructureNum); } inline void CommandBufferD3D12::DispatchRays(const DispatchRaysDesc& dispatchRaysDesc) { diff --git a/Source/D3D12/CommandBufferD3D12.hpp b/Source/D3D12/CommandBufferD3D12.hpp index 42bb1dd8..891e374f 100644 --- a/Source/D3D12/CommandBufferD3D12.hpp +++ b/Source/D3D12/CommandBufferD3D12.hpp @@ -159,16 +159,6 @@ static void NRI_CALL CmdCopyQueries(CommandBuffer& commandBuffer, const QueryPoo static void NRI_CALL CmdResetQueries(CommandBuffer&, const QueryPool&, uint32_t, uint32_t) { } -static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { - if (!(&commandBuffer)) - return; - - CommandBufferD3D12& commandBufferImpl = (CommandBufferD3D12&)commandBuffer; - DeviceD3D12& deviceImpl = commandBufferImpl.GetDevice(); - - Deallocate(deviceImpl.GetStdAllocator(), &commandBufferImpl); -} - static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandBuffer) { if (!(&commandBuffer)) return nullptr; diff --git a/Source/D3D12/CommandQueueD3D12.cpp b/Source/D3D12/CommandQueueD3D12.cpp index f65e52ad..a5c2354f 100644 --- a/Source/D3D12/CommandQueueD3D12.cpp +++ b/Source/D3D12/CommandQueueD3D12.cpp @@ -46,7 +46,7 @@ inline void CommandQueueD3D12::Submit(const QueueSubmitDesc& queueSubmitDesc) { } if (queueSubmitDesc.commandBufferNum) { - ID3D12CommandList** commandLists = STACK_ALLOC(ID3D12CommandList*, queueSubmitDesc.commandBufferNum); + ID3D12CommandList** commandLists = StackAlloc(ID3D12CommandList*, queueSubmitDesc.commandBufferNum); for (uint32_t j = 0; j < queueSubmitDesc.commandBufferNum; j++) commandLists[j] = *(CommandBufferD3D12*)queueSubmitDesc.commandBuffers[j]; diff --git a/Source/D3D12/DeviceD3D12.cpp b/Source/D3D12/DeviceD3D12.cpp index e88daa0e..dbf261da 100644 --- a/Source/D3D12/DeviceD3D12.cpp +++ b/Source/D3D12/DeviceD3D12.cpp @@ -58,19 +58,32 @@ static inline uint64_t HashRootSignatureAndStride(ID3D12RootSignature* rootSigna return ((uint64_t)stride << 52ull) | ((uint64_t)rootSignature & ((1ull << 52) - 1)); } +static void* vmaAllocate(size_t size, size_t alignment, void* pPrivateData) { + StdAllocator& stdAllocator = *(StdAllocator*)pPrivateData; + const auto& lowLevelAllocator = stdAllocator.GetInterface(); + + return lowLevelAllocator.Allocate(lowLevelAllocator.userArg, size, alignment); +} + +static void vmaFree(void* pMemory, void* pPrivateData) { + StdAllocator& stdAllocator = *(StdAllocator*)pPrivateData; + const auto& lowLevelAllocator = stdAllocator.GetInterface(); + + return lowLevelAllocator.Free(lowLevelAllocator.userArg, pMemory); +} + Result CreateDeviceD3D12(const DeviceCreationDesc& deviceCreationDesc, DeviceBase*& device) { DeviceCreationD3D12Desc deviceCreationD3D12Desc = {}; - StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); - DeviceD3D12* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - Result result = implementation->Create(deviceCreationDesc, deviceCreationD3D12Desc); - - if (result == Result::SUCCESS) { - device = (DeviceBase*)implementation; - return Result::SUCCESS; - } + StdAllocator allocator(deviceCreationDesc.allocationCallbacks); + DeviceD3D12* impl = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); + Result result = impl->Create(deviceCreationDesc, deviceCreationD3D12Desc); - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; return result; } @@ -87,20 +100,19 @@ Result CreateDeviceD3D12(const DeviceCreationD3D12Desc& deviceCreationD3D12Desc, DeviceCreationDesc deviceCreationDesc = {}; deviceCreationDesc.adapterDesc = &adapterDesc; deviceCreationDesc.callbackInterface = deviceCreationD3D12Desc.callbackInterface; - deviceCreationDesc.memoryAllocatorInterface = deviceCreationD3D12Desc.memoryAllocatorInterface; + deviceCreationDesc.allocationCallbacks = deviceCreationD3D12Desc.allocationCallbacks; deviceCreationDesc.enableD3D12DrawParametersEmulation = deviceCreationD3D12Desc.enableD3D12DrawParametersEmulation; deviceCreationDesc.graphicsAPI = GraphicsAPI::D3D12; - StdAllocator allocator(deviceCreationD3D12Desc.memoryAllocatorInterface); - DeviceD3D12* implementation = Allocate(allocator, deviceCreationD3D12Desc.callbackInterface, allocator); - Result result = implementation->Create(deviceCreationDesc, deviceCreationD3D12Desc); + StdAllocator allocator(deviceCreationD3D12Desc.allocationCallbacks); + DeviceD3D12* impl = Allocate(allocator, deviceCreationD3D12Desc.callbackInterface, allocator); + Result result = impl->Create(deviceCreationDesc, deviceCreationD3D12Desc); - if (result == Result::SUCCESS) { - device = implementation; - return Result::SUCCESS; - } - - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; return result; } @@ -116,12 +128,15 @@ DeviceD3D12::DeviceD3D12(const CallbackInterface& callbacks, StdAllocator -Result DeviceD3D12::CreateImplementation(Interface*& entity, const Args&... args) { - Implementation* implementation = Allocate(GetStdAllocator(), *this); - Result result = implementation->Create(args...); - - if (result == Result::SUCCESS) { - entity = (Interface*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - entity = nullptr; - - return result; -} - Result DeviceD3D12::Create(const DeviceCreationDesc& deviceCreationDesc, const DeviceCreationD3D12Desc& deviceCreationD3D12Desc) { + if (!deviceCreationD3D12Desc.d3d12Device && !deviceCreationDesc.disable3rdPartyAllocationCallbacks) + m_AllocationCallbackPtr = &m_AllocationCallbacks; + // IMPORTANT: Must be called before the D3D12 device is created, or the D3D12 runtime removes the device if (deviceCreationDesc.enableGraphicsAPIValidation) { ComPtr debugController; @@ -173,7 +175,7 @@ Result DeviceD3D12::Create(const DeviceCreationDesc& deviceCreationDesc, const D hr = m_Adapter->GetDesc(&desc); RETURN_ON_BAD_HRESULT(this, hr, "IDXGIAdapter::GetDesc()"); - wcstombs(m_Desc.adapterDesc.description, desc.Description, GetCountOf(m_Desc.adapterDesc.description) - 1); + wcstombs(m_Desc.adapterDesc.name, desc.Description, GetCountOf(m_Desc.adapterDesc.name) - 1); m_Desc.adapterDesc.luid = *(uint64_t*)&desc.AdapterLuid; m_Desc.adapterDesc.videoMemorySize = desc.DedicatedVideoMemory; m_Desc.adapterDesc.systemMemorySize = desc.DedicatedSystemMemory + desc.SharedSystemMemory; @@ -226,7 +228,7 @@ Result DeviceD3D12::Create(const DeviceCreationDesc& deviceCreationDesc, const D m_IsWrapped = true; m_Version = QueryLatestDevice(deviceTemp, m_Device); - REPORT_INFO(this, "Using ID3D12Device%u...", m_Version); + REPORT_INFO(this, "Using ID3D12Device%u", m_Version); if (deviceCreationDesc.enableGraphicsAPIValidation) { ComPtr pInfoQueue; @@ -288,7 +290,7 @@ void DeviceD3D12::FillDesc(const DeviceCreationDesc& deviceCreationDesc) { hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); if (FAILED(hr)) REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options) failed, result = 0x%08X!", hr); - m_Desc.memoryTier = options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_2 ? MemoryTier::TWO : MemoryTier::ONE; + m_Desc.isMemoryTier2Supported = options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_2 ? true : false; D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {}; hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1)); @@ -472,7 +474,7 @@ void DeviceD3D12::FillDesc(const DeviceCreationDesc& deviceCreationDesc) { m_Desc.constantBufferMaxRange = D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT * 16; m_Desc.storageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; m_Desc.storageBufferMaxRange = (1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP) - 1; - m_Desc.bufferTextureGranularity = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; // TODO: 1024 in VK + m_Desc.bufferTextureGranularity = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; m_Desc.bufferMaxSize = D3D12_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024ull * 1024ull; m_Desc.pushConstantsMaxSize = D3D12_REQ_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT * 16; @@ -653,61 +655,72 @@ DescriptorPointerCPU DeviceD3D12::GetDescriptorPointerCPU(const DescriptorHandle return descriptorPointer; } -constexpr std::array HEAP_TYPES = { - D3D12_HEAP_TYPE_DEFAULT, // DEVICE -#ifdef NRI_USE_AGILITY_SDK - // Prerequisite: D3D12_FEATURE_D3D12_OPTIONS16 - D3D12_HEAP_TYPE_GPU_UPLOAD, // DEVICE_UPLOAD -#else - D3D12_HEAP_TYPE_UPLOAD, // DEVICE_UPLOAD (silent fallback to HOST_UPLOAD) -#endif - D3D12_HEAP_TYPE_UPLOAD, // HOST_UPLOAD - D3D12_HEAP_TYPE_READBACK, // HOST_READBACK -}; - -static inline MemoryType ConstructMemoryType(D3D12_HEAP_TYPE heapType, D3D12_HEAP_FLAGS heapFlags) { - return ((uint32_t)heapFlags) | ((uint32_t)heapType << 16); -} - void DeviceD3D12::GetMemoryDesc(MemoryLocation memoryLocation, const D3D12_RESOURCE_DESC& resourceDesc, MemoryDesc& memoryDesc) const { if (memoryLocation == MemoryLocation::DEVICE_UPLOAD && m_Desc.deviceUploadHeapSize == 0) memoryLocation = MemoryLocation::HOST_UPLOAD; - D3D12_HEAP_TYPE heapType = HEAP_TYPES[(uint32_t)memoryLocation]; + D3D12_HEAP_TYPE heapType = GetHeapType(memoryLocation); D3D12_HEAP_FLAGS heapFlags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; - if (m_Desc.memoryTier == MemoryTier::ONE) { + bool mustBeDedicated = false; + if (!m_Desc.isMemoryTier2Supported) { if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) heapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - else if (resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + else if (resourceDesc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { heapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; - else + mustBeDedicated = true; + } else heapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; } D3D12_RESOURCE_ALLOCATION_INFO resourceAllocationInfo = m_Device->GetResourceAllocationInfo(NRI_NODE_MASK, 1, &resourceDesc); - MemoryType memoryType = ConstructMemoryType(heapType, heapFlags); + MemoryTypeInfo memoryTypeInfo = {}; + memoryTypeInfo.heapFlags = (uint16_t)heapFlags; + memoryTypeInfo.heapType = (uint8_t)heapType; + memoryTypeInfo.mustBeDedicated = mustBeDedicated; + + memoryDesc = {}; memoryDesc.size = resourceAllocationInfo.SizeInBytes; memoryDesc.alignment = (uint32_t)resourceAllocationInfo.Alignment; - memoryDesc.type = memoryType; - memoryDesc.mustBeDedicated = IsDedicated(memoryType); + memoryDesc.type = Pack(memoryTypeInfo); + memoryDesc.mustBeDedicated = mustBeDedicated; } -void DeviceD3D12::GetAccelerationStructureMemoryDesc(uint64_t size, MemoryDesc& memoryDesc) const { - D3D12_HEAP_FLAGS heapFlags = m_Desc.memoryTier == MemoryTier::TWO ? D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES : D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; +void DeviceD3D12::GetAccelerationStructureMemoryDesc(const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo = {}; + GetAccelerationStructurePrebuildInfo(accelerationStructureDesc, prebuildInfo); + + D3D12_HEAP_TYPE heapType = GetHeapType(memoryLocation); + D3D12_HEAP_FLAGS heapFlags = m_Desc.isMemoryTier2Supported ? D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES : D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + MemoryTypeInfo memoryTypeInfo = {}; + memoryTypeInfo.heapFlags = (uint16_t)heapFlags; + memoryTypeInfo.heapType = (uint8_t)heapType; - memoryDesc.size = size; + memoryDesc = {}; + memoryDesc.size = prebuildInfo.ResultDataMaxSizeInBytes; memoryDesc.alignment = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT; - memoryDesc.type = ConstructMemoryType(D3D12_HEAP_TYPE_DEFAULT, heapFlags); - memoryDesc.mustBeDedicated = false; + memoryDesc.type = Pack(memoryTypeInfo); } -bool DeviceD3D12::IsDedicated(MemoryType memoryType) const { - D3D12_HEAP_FLAGS heapFlags = GetHeapFlags(memoryType); - bool isRtDs = (heapFlags & D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES) == D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; +void DeviceD3D12::GetAccelerationStructurePrebuildInfo( + const AccelerationStructureDesc& accelerationStructureDesc, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO& prebuildInfo) { + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS accelerationStructureInputs = {}; + accelerationStructureInputs.Type = GetAccelerationStructureType(accelerationStructureDesc.type); + accelerationStructureInputs.Flags = GetAccelerationStructureBuildFlags(accelerationStructureDesc.flags); + accelerationStructureInputs.NumDescs = accelerationStructureDesc.instanceOrGeometryObjectNum; + accelerationStructureInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; // TODO: D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS support? + + uint32_t geometryCount = accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL ? accelerationStructureDesc.instanceOrGeometryObjectNum : 0; + Scratch geometryDescs = AllocateScratch(*this, D3D12_RAYTRACING_GEOMETRY_DESC, geometryCount); + + if (accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL && accelerationStructureDesc.instanceOrGeometryObjectNum) { + ConvertGeometryDescs(geometryDescs, accelerationStructureDesc.geometryObjects, accelerationStructureDesc.instanceOrGeometryObjectNum); + accelerationStructureInputs.pGeometryDescs = geometryDescs; + } - return m_Desc.memoryTier == MemoryTier::ONE && isRtDs; + m_Device->GetRaytracingAccelerationStructurePrebuildInfo(&accelerationStructureInputs, &prebuildInfo); } ComPtr DeviceD3D12::CreateCommandSignature( @@ -788,22 +801,14 @@ ID3D12CommandSignature* DeviceD3D12::GetDispatchCommandSignature() const { // DeviceBase //================================================================================================================ -void DeviceD3D12::Destroy() { - Deallocate(GetStdAllocator(), this); +void DeviceD3D12::Destruct() { + Destroy(GetStdAllocator(), this); } //================================================================================================================ // NRI //================================================================================================================ -inline Result DeviceD3D12::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return CreateImplementation(swapChain, swapChainDesc); -} - -inline void DeviceD3D12::DestroySwapChain(SwapChain& swapChain) { - Deallocate(GetStdAllocator(), (SwapChainD3D12*)&swapChain); -} - inline Result DeviceD3D12::GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue) { ExclusiveScope lock(m_QueueLock); @@ -829,138 +834,10 @@ inline Result DeviceD3D12::GetCommandQueue(CommandQueueType commandQueueType, Co return result; } -inline Result DeviceD3D12::CreateCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue) { - return CreateImplementation(commandQueue, commandQueueType); -} - inline Result DeviceD3D12::CreateCommandQueue(void* d3d12commandQueue, CommandQueueD3D12*& commandQueue) { return CreateImplementation(commandQueue, (ID3D12CommandQueue*)d3d12commandQueue); } -inline Result DeviceD3D12::CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator) { - return CreateImplementation(commandAllocator, commandQueue); -} - -inline Result DeviceD3D12::CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return CreateImplementation(descriptorPool, descriptorPoolDesc); -} - -inline Result DeviceD3D12::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { - return CreateImplementation(buffer, bufferDesc); -} - -inline Result DeviceD3D12::CreateTexture(const TextureDesc& textureDesc, Texture*& texture) { - return CreateImplementation(texture, textureDesc); -} - -inline Result DeviceD3D12::CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { - return CreateImplementation(bufferView, bufferViewDesc); -} - -inline Result DeviceD3D12::CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceD3D12::CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceD3D12::CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -Result DeviceD3D12::CreateDescriptor(const AccelerationStructure& accelerationStructure, Descriptor*& accelerationStructureView) { // TODO: not inline - return CreateImplementation(accelerationStructureView, accelerationStructure); -} - -inline Result DeviceD3D12::CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return CreateImplementation(sampler, samplerDesc); -} - -inline Result DeviceD3D12::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - return CreateImplementation(pipelineLayout, pipelineLayoutDesc); -} - -inline Result DeviceD3D12::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, graphicsPipelineDesc); -} - -inline Result DeviceD3D12::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, computePipelineDesc); -} - -inline Result DeviceD3D12::CreatePipeline(const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, rayTracingPipelineDesc); -} - -inline Result DeviceD3D12::CreateFence(uint64_t initialValue, Fence*& fence) { - return CreateImplementation(fence, initialValue); -} - -inline Result DeviceD3D12::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return CreateImplementation(queryPool, queryPoolDesc); -} - -inline Result DeviceD3D12::CreateCommandBuffer(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { - return CreateImplementation(commandBuffer, commandBufferDesc); -} - -inline Result DeviceD3D12::CreateDescriptorPool(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { - return CreateImplementation(descriptorPool, descriptorPoolD3D12Desc); -} - -Result DeviceD3D12::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { // TODO: not inline - return CreateImplementation(buffer, bufferDesc); -} - -inline Result DeviceD3D12::CreateTexture(const TextureD3D12Desc& textureDesc, Texture*& texture) { - return CreateImplementation(texture, textureDesc); -} - -inline Result DeviceD3D12::CreateMemory(const MemoryD3D12Desc& memoryDesc, Memory*& memory) { - return CreateImplementation(memory, memoryDesc); -} - -inline void DeviceD3D12::DestroyCommandAllocator(CommandAllocator& commandAllocator) { - Deallocate(GetStdAllocator(), (CommandAllocatorD3D12*)&commandAllocator); -} - -inline void DeviceD3D12::DestroyDescriptorPool(DescriptorPool& descriptorPool) { - Deallocate(GetStdAllocator(), (DescriptorPoolD3D12*)&descriptorPool); -} - -inline void DeviceD3D12::DestroyBuffer(Buffer& buffer) { - Deallocate(GetStdAllocator(), (BufferD3D12*)&buffer); -} - -inline void DeviceD3D12::DestroyTexture(Texture& texture) { - Deallocate(GetStdAllocator(), (TextureD3D12*)&texture); -} - -inline void DeviceD3D12::DestroyDescriptor(Descriptor& descriptor) { - Deallocate(GetStdAllocator(), (DescriptorD3D12*)&descriptor); -} - -inline void DeviceD3D12::DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - Deallocate(GetStdAllocator(), (PipelineLayoutD3D12*)&pipelineLayout); -} - -inline void DeviceD3D12::DestroyPipeline(Pipeline& pipeline) { - Deallocate(GetStdAllocator(), (PipelineD3D12*)&pipeline); -} - -inline void DeviceD3D12::DestroyFence(Fence& fence) { - Deallocate(GetStdAllocator(), (FenceD3D12*)&fence); -} - -inline void DeviceD3D12::DestroyQueryPool(QueryPool& queryPool) { - Deallocate(GetStdAllocator(), (QueryPoolD3D12*)&queryPool); -} - -inline Result DeviceD3D12::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - return CreateImplementation(memory, allocateMemoryDesc); -} - inline Result DeviceD3D12::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { Result result = ((BufferD3D12*)memoryBindingDescs[i].buffer)->BindMemory((MemoryD3D12*)memoryBindingDescs[i].memory, memoryBindingDescs[i].offset); @@ -991,10 +868,6 @@ inline Result DeviceD3D12::BindAccelerationStructureMemory(const AccelerationStr return Result::SUCCESS; } -inline void DeviceD3D12::FreeMemory(Memory& memory) { - Deallocate(GetStdAllocator(), (MemoryD3D12*)&memory); -} - inline FormatSupportBits DeviceD3D12::GetFormatSupport(Format format) const { FormatSupportBits mask = FormatSupportBits::UNSUPPORTED; @@ -1038,18 +911,6 @@ inline FormatSupportBits DeviceD3D12::GetFormatSupport(Format format) const { return mask; } -inline Result DeviceD3D12::CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return CreateImplementation(accelerationStructure, accelerationStructureDesc); -} - -inline Result DeviceD3D12::CreateAccelerationStructure(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return CreateImplementation(accelerationStructure, accelerationStructureDesc); -} - -inline void DeviceD3D12::DestroyAccelerationStructure(AccelerationStructure& accelerationStructure) { - Deallocate(GetStdAllocator(), (AccelerationStructureD3D12*)&accelerationStructure); -} - Result DeviceD3D12::CreateDefaultDrawSignatures(ID3D12RootSignature* rootSignature, bool enableDrawParametersEmulation) { bool drawParametersEmulation = m_Desc.isDrawParametersEmulationEnabled && enableDrawParametersEmulation; const uint32_t drawStride = drawParametersEmulation ? sizeof(nri::DrawBaseDesc) : sizeof(nri::DrawDesc); diff --git a/Source/D3D12/DeviceD3D12.h b/Source/D3D12/DeviceD3D12.h index 7ee233f2..cc8885e2 100644 --- a/Source/D3D12/DeviceD3D12.h +++ b/Source/D3D12/DeviceD3D12.h @@ -50,6 +50,10 @@ struct DeviceD3D12 final : public DeviceBase { return m_CoreInterface; } + inline D3D12MA::Allocator* GetVma() const { + return m_Vma; + } + inline void FreeDescriptorHandle(D3D12_DESCRIPTOR_HEAP_TYPE type, const DescriptorHandle& descriptorHandle) { ExclusiveScope lock(m_FreeDescriptorLocks[type]); auto& freeDescriptors = m_FreeDescriptors[type]; @@ -57,7 +61,18 @@ struct DeviceD3D12 final : public DeviceBase { } template - Result CreateImplementation(Interface*& entity, const Args&... args); + inline Result CreateImplementation(Interface*& entity, const Args&... args) { + Implementation* impl = Allocate(GetStdAllocator(), *this); + Result result = impl->Create(args...); + + if (result != Result::SUCCESS) { + Destroy(GetStdAllocator(), impl); + entity = nullptr; + } else + entity = (Interface*)impl; + + return result; + } Result Create(const DeviceCreationDesc& deviceCreationDesc, const DeviceCreationD3D12Desc& deviceCreationD3D12Desc); Result CreateDefaultDrawSignatures(ID3D12RootSignature* rootSignature, bool enableDrawParametersEmulation); @@ -65,69 +80,29 @@ struct DeviceD3D12 final : public DeviceBase { Result GetDescriptorHandle(D3D12_DESCRIPTOR_HEAP_TYPE type, DescriptorHandle& descriptorHandle); DescriptorPointerCPU GetDescriptorPointerCPU(const DescriptorHandle& descriptorHandle); void GetMemoryDesc(MemoryLocation memoryLocation, const D3D12_RESOURCE_DESC& resourceDesc, MemoryDesc& memoryDesc) const; - void GetAccelerationStructureMemoryDesc(uint64_t size, MemoryDesc& memoryDesc) const; - bool IsDedicated(MemoryType memoryType) const; - + void GetAccelerationStructureMemoryDesc(const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc); + void GetAccelerationStructurePrebuildInfo(const AccelerationStructureDesc& accelerationStructureDesc, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO& prebuildInfo); ID3D12CommandSignature* GetDrawCommandSignature(uint32_t stride, ID3D12RootSignature* rootSignature); ID3D12CommandSignature* GetDrawIndexedCommandSignature(uint32_t stride, ID3D12RootSignature* rootSignature); ID3D12CommandSignature* GetDrawMeshCommandSignature(uint32_t stride); ID3D12CommandSignature* GetDispatchRaysCommandSignature() const; ID3D12CommandSignature* GetDispatchCommandSignature() const; + Result CreateVma(); //================================================================================================================ // NRI //================================================================================================================ - inline void SetDebugName(const char* name) { - SET_D3D_DEBUG_OBJECT_NAME(m_Device, name); - } - - Result CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain); - void DestroySwapChain(SwapChain& swapChain); - - Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); - Result CreateCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result CreateCommandQueue(void* d3d12commandQueue, CommandQueueD3D12*& commandQueue); - Result CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator); - Result CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool); - Result CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer); - Result CreateTexture(const TextureDesc& textureDesc, Texture*& texture); - Result CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView); - Result CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const AccelerationStructure& accelerationStructure, Descriptor*& accelerationStructureView); - Result CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler); - Result CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout); - Result CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline); - Result CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline); - Result CreatePipeline(const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline); - Result CreateFence(uint64_t initialValue, Fence*& fence); - Result CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool); - Result CreateCommandBuffer(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer); - Result CreateDescriptorPool(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool); - Result CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer); - Result CreateTexture(const TextureD3D12Desc& textureDesc, Texture*& texture); - Result CreateMemory(const MemoryD3D12Desc& memoryDesc, Memory*& memory); - Result CreateAccelerationStructure(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - void DestroyCommandAllocator(CommandAllocator& commandAllocator); - void DestroyDescriptorPool(DescriptorPool& descriptorPool); - void DestroyBuffer(Buffer& buffer); - void DestroyTexture(Texture& texture); - void DestroyDescriptor(Descriptor& descriptor); - void DestroyPipelineLayout(PipelineLayout& pipelineLayout); - void DestroyPipeline(Pipeline& pipeline); - void DestroyFence(Fence& queueSemaphore); - void DestroyQueryPool(QueryPool& queryPool); - Result AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory); + Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); Result BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void FreeMemory(Memory& memory); + Result BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); FormatSupportBits GetFormatSupport(Format format) const; - Result CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - Result BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void DestroyAccelerationStructure(AccelerationStructure& accelerationStructure); + inline void SetDebugName(const char* name) { + SET_D3D_DEBUG_OBJECT_NAME(m_Device, name); + } //================================================================================================================ // DeviceBase @@ -137,15 +112,16 @@ struct DeviceD3D12 final : public DeviceBase { return m_Desc; } - void Destroy(); + void Destruct(); Result FillFunctionTable(CoreInterface& table) const; + Result FillFunctionTable(HelperInterface& table) const; + Result FillFunctionTable(LowLatencyInterface& table) const; + Result FillFunctionTable(MeshShaderInterface& table) const; + Result FillFunctionTable(RayTracingInterface& table) const; + Result FillFunctionTable(StreamerInterface& table) const; Result FillFunctionTable(SwapChainInterface& table) const; - Result FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interface) const; - Result FillFunctionTable(RayTracingInterface& rayTracingInterface) const; - Result FillFunctionTable(MeshShaderInterface& meshShaderInterface) const; - Result FillFunctionTable(HelperInterface& helperInterface) const; - Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; - Result FillFunctionTable(StreamerInterface& streamerInterface) const; + Result FillFunctionTable(ResourceAllocatorInterface& table) const; + Result FillFunctionTable(WrapperD3D12Interface& table) const; private: void FillDesc(const DeviceCreationDesc& deviceCreationDesc); @@ -165,6 +141,9 @@ struct DeviceD3D12 final : public DeviceBase { UnorderedMap> m_DrawMeshCommandSignatures; ComPtr m_DispatchCommandSignature; ComPtr m_DispatchRaysCommandSignature; + ComPtr m_Vma; + D3D12MA::ALLOCATION_CALLBACKS m_AllocationCallbacks = {}; + D3D12MA::ALLOCATION_CALLBACKS* m_AllocationCallbackPtr = nullptr; CoreInterface m_CoreInterface = {}; uint8_t m_Version = 0; bool m_IsWrapped = false; diff --git a/Source/D3D12/DeviceD3D12.hpp b/Source/D3D12/DeviceD3D12.hpp index e4242683..0d9e9296 100644 --- a/Source/D3D12/DeviceD3D12.hpp +++ b/Source/D3D12/DeviceD3D12.hpp @@ -40,139 +40,107 @@ static Result NRI_CALL GetCommandQueue(Device& device, CommandQueueType commandQ static Result NRI_CALL CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator) { DeviceD3D12& device = ((CommandQueueD3D12&)commandQueue).GetDevice(); - return device.CreateCommandAllocator(commandQueue, commandAllocator); + return device.CreateImplementation(commandAllocator, commandQueue); } static Result NRI_CALL CreateDescriptorPool(Device& device, const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return ((DeviceD3D12&)device).CreateDescriptorPool(descriptorPoolDesc, descriptorPool); + return ((DeviceD3D12&)device).CreateImplementation(descriptorPool, descriptorPoolDesc); } static Result NRI_CALL CreateBuffer(Device& device, const BufferDesc& bufferDesc, Buffer*& buffer) { - return ((DeviceD3D12&)device).CreateBuffer(bufferDesc, buffer); + return ((DeviceD3D12&)device).CreateImplementation(buffer, bufferDesc); } static Result NRI_CALL CreateTexture(Device& device, const TextureDesc& textureDesc, Texture*& texture) { - return ((DeviceD3D12&)device).CreateTexture(textureDesc, texture); + return ((DeviceD3D12&)device).CreateImplementation(texture, textureDesc); } static Result NRI_CALL CreateBufferView(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { DeviceD3D12& device = ((const BufferD3D12*)bufferViewDesc.buffer)->GetDevice(); - return device.CreateDescriptor(bufferViewDesc, bufferView); + return device.CreateImplementation(bufferView, bufferViewDesc); } static Result NRI_CALL CreateTexture1DView(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D12& device = ((const TextureD3D12*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture2DView(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D12& device = ((const TextureD3D12*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture3DView(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceD3D12& device = ((const TextureD3D12*)textureViewDesc.texture)->GetDevice(); - return device.CreateDescriptor(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateSampler(Device& device, const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return ((DeviceD3D12&)device).CreateDescriptor(samplerDesc, sampler); + return ((DeviceD3D12&)device).CreateImplementation(sampler, samplerDesc); } static Result NRI_CALL CreatePipelineLayout(Device& device, const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - return ((DeviceD3D12&)device).CreatePipelineLayout(pipelineLayoutDesc, pipelineLayout); + return ((DeviceD3D12&)device).CreateImplementation(pipelineLayout, pipelineLayoutDesc); } static Result NRI_CALL CreateGraphicsPipeline(Device& device, const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - return ((DeviceD3D12&)device).CreatePipeline(graphicsPipelineDesc, pipeline); + return ((DeviceD3D12&)device).CreateImplementation(pipeline, graphicsPipelineDesc); } static Result NRI_CALL CreateComputePipeline(Device& device, const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - return ((DeviceD3D12&)device).CreatePipeline(computePipelineDesc, pipeline); + return ((DeviceD3D12&)device).CreateImplementation(pipeline, computePipelineDesc); } static Result NRI_CALL CreateFence(Device& device, uint64_t initialValue, Fence*& fence) { - return ((DeviceD3D12&)device).CreateFence(initialValue, fence); + return ((DeviceD3D12&)device).CreateImplementation(fence, initialValue); } static Result NRI_CALL CreateQueryPool(Device& device, const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return ((DeviceD3D12&)device).CreateQueryPool(queryPoolDesc, queryPool); + return ((DeviceD3D12&)device).CreateImplementation(queryPool, queryPoolDesc); } -static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { - if (!(&commandAllocator)) - return; +static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { + Destroy((CommandBufferD3D12*)&commandBuffer); +} - DeviceD3D12& device = ((CommandAllocatorD3D12&)commandAllocator).GetDevice(); - device.DestroyCommandAllocator(commandAllocator); +static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { + Destroy((CommandAllocatorD3D12*)&commandAllocator); } static void NRI_CALL DestroyDescriptorPool(DescriptorPool& descriptorPool) { - if (!(&descriptorPool)) - return; - - DeviceD3D12& device = ((DescriptorPoolD3D12&)descriptorPool).GetDevice(); - device.DestroyDescriptorPool(descriptorPool); + Destroy((DescriptorPoolD3D12*)&descriptorPool); } static void NRI_CALL DestroyBuffer(Buffer& buffer) { - if (!(&buffer)) - return; - - DeviceD3D12& device = ((BufferD3D12&)buffer).GetDevice(); - device.DestroyBuffer(buffer); + Destroy((BufferD3D12*)&buffer); } static void NRI_CALL DestroyTexture(Texture& texture) { - if (!(&texture)) - return; - - DeviceD3D12& device = ((TextureD3D12&)texture).GetDevice(); - device.DestroyTexture(texture); + Destroy((TextureD3D12*)&texture); } static void NRI_CALL DestroyDescriptor(Descriptor& descriptor) { - if (!(&descriptor)) - return; - - DeviceD3D12& device = ((DescriptorD3D12&)descriptor).GetDevice(); - device.DestroyDescriptor(descriptor); + Destroy((DescriptorD3D12*)&descriptor); } static void NRI_CALL DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - if (!(&pipelineLayout)) - return; - - DeviceD3D12& device = ((PipelineLayoutD3D12&)pipelineLayout).GetDevice(); - device.DestroyPipelineLayout(pipelineLayout); + Destroy((PipelineLayoutD3D12*)&pipelineLayout); } static void NRI_CALL DestroyPipeline(Pipeline& pipeline) { - if (!(&pipeline)) - return; - - DeviceD3D12& device = ((PipelineD3D12&)pipeline).GetDevice(); - device.DestroyPipeline(pipeline); -} - -static void NRI_CALL DestroyFence(Fence& fence) { - if (!(&fence)) - return; - - DeviceD3D12& device = ((FenceD3D12&)fence).GetDevice(); - device.DestroyFence(fence); + Destroy((PipelineD3D12*)&pipeline); } static void NRI_CALL DestroyQueryPool(QueryPool& queryPool) { - if (!(&queryPool)) - return; + Destroy((QueryPoolD3D12*)&queryPool); +} - DeviceD3D12& device = ((QueryPoolD3D12&)queryPool).GetDevice(); - device.DestroyQueryPool(queryPool); +static void NRI_CALL DestroyFence(Fence& fence) { + Destroy((FenceD3D12*)&fence); } static Result NRI_CALL AllocateMemory(Device& device, const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - return ((DeviceD3D12&)device).AllocateMemory(allocateMemoryDesc, memory); + return ((DeviceD3D12&)device).CreateImplementation(memory, allocateMemoryDesc); } static Result NRI_CALL BindBufferMemory(Device& device, const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { @@ -184,11 +152,7 @@ static Result NRI_CALL BindTextureMemory(Device& device, const TextureMemoryBind } static void NRI_CALL FreeMemory(Memory& memory) { - if (!(&memory)) - return; - - DeviceD3D12& device = ((MemoryD3D12&)memory).GetDevice(); - device.FreeMemory(memory); + Destroy((MemoryD3D12*)&memory); } static void NRI_CALL SetDeviceDebugName(Device& device, const char* name) { @@ -214,60 +178,21 @@ static void* NRI_CALL GetDeviceNativeObject(const Device& device) { return ((DeviceD3D12&)device).GetNativeObject(); } -Result DeviceD3D12::FillFunctionTable(CoreInterface& coreInterface) const { - coreInterface = {}; - coreInterface.GetDeviceDesc = ::GetDeviceDesc; - coreInterface.GetBufferDesc = ::GetBufferDesc; - coreInterface.GetTextureDesc = ::GetTextureDesc; - coreInterface.GetFormatSupport = ::GetFormatSupport; - coreInterface.GetBufferMemoryDesc = ::GetBufferMemoryDesc; - coreInterface.GetTextureMemoryDesc = ::GetTextureMemoryDesc; - coreInterface.GetCommandQueue = ::GetCommandQueue; - coreInterface.CreateCommandAllocator = ::CreateCommandAllocator; - coreInterface.CreateDescriptorPool = ::CreateDescriptorPool; - coreInterface.CreateBuffer = ::CreateBuffer; - coreInterface.CreateTexture = ::CreateTexture; - coreInterface.CreateBufferView = ::CreateBufferView; - coreInterface.CreateTexture1DView = ::CreateTexture1DView; - coreInterface.CreateTexture2DView = ::CreateTexture2DView; - coreInterface.CreateTexture3DView = ::CreateTexture3DView; - coreInterface.CreateSampler = ::CreateSampler; - coreInterface.CreatePipelineLayout = ::CreatePipelineLayout; - coreInterface.CreateGraphicsPipeline = ::CreateGraphicsPipeline; - coreInterface.CreateComputePipeline = ::CreateComputePipeline; - coreInterface.CreateQueryPool = ::CreateQueryPool; - coreInterface.CreateFence = ::CreateFence; - coreInterface.DestroyCommandAllocator = ::DestroyCommandAllocator; - coreInterface.DestroyDescriptorPool = ::DestroyDescriptorPool; - coreInterface.DestroyBuffer = ::DestroyBuffer; - coreInterface.DestroyTexture = ::DestroyTexture; - coreInterface.DestroyDescriptor = ::DestroyDescriptor; - coreInterface.DestroyPipelineLayout = ::DestroyPipelineLayout; - coreInterface.DestroyPipeline = ::DestroyPipeline; - coreInterface.DestroyFence = ::DestroyFence; - coreInterface.DestroyQueryPool = ::DestroyQueryPool; - coreInterface.AllocateMemory = ::AllocateMemory; - coreInterface.BindBufferMemory = ::BindBufferMemory; - coreInterface.BindTextureMemory = ::BindTextureMemory; - coreInterface.FreeMemory = ::FreeMemory; - coreInterface.SetDeviceDebugName = ::SetDeviceDebugName; - coreInterface.SetPipelineDebugName = ::SetPipelineDebugName; - coreInterface.SetPipelineLayoutDebugName = ::SetPipelineLayoutDebugName; - coreInterface.SetMemoryDebugName = ::SetMemoryDebugName; - coreInterface.GetDeviceNativeObject = ::GetDeviceNativeObject; - - Core_Buffer_PartiallyFillFunctionTableD3D12(coreInterface); - Core_CommandAllocator_PartiallyFillFunctionTableD3D12(coreInterface); - Core_CommandBuffer_PartiallyFillFunctionTableD3D12(coreInterface); - Core_CommandQueue_PartiallyFillFunctionTableD3D12(coreInterface); - Core_Descriptor_PartiallyFillFunctionTableD3D12(coreInterface); - Core_DescriptorPool_PartiallyFillFunctionTableD3D12(coreInterface); - Core_DescriptorSet_PartiallyFillFunctionTableD3D12(coreInterface); - Core_Fence_PartiallyFillFunctionTableD3D12(coreInterface); - Core_QueryPool_PartiallyFillFunctionTableD3D12(coreInterface); - Core_Texture_PartiallyFillFunctionTableD3D12(coreInterface); - - return ValidateFunctionTable(coreInterface); +Result DeviceD3D12::FillFunctionTable(CoreInterface& table) const { + table = {}; + Core_Device_PartiallyFillFunctionTableD3D12(table); + Core_Buffer_PartiallyFillFunctionTableD3D12(table); + Core_CommandAllocator_PartiallyFillFunctionTableD3D12(table); + Core_CommandBuffer_PartiallyFillFunctionTableD3D12(table); + Core_CommandQueue_PartiallyFillFunctionTableD3D12(table); + Core_Descriptor_PartiallyFillFunctionTableD3D12(table); + Core_DescriptorPool_PartiallyFillFunctionTableD3D12(table); + Core_DescriptorSet_PartiallyFillFunctionTableD3D12(table); + Core_Fence_PartiallyFillFunctionTableD3D12(table); + Core_QueryPool_PartiallyFillFunctionTableD3D12(table); + Core_Texture_PartiallyFillFunctionTableD3D12(table); + + return ValidateFunctionTable(table); } #pragma endregion @@ -294,156 +219,104 @@ static Result NRI_CALL QueryVideoMemoryInfo(const Device& device, MemoryLocation return QueryVideoMemoryInfoDXGI(luid, memoryLocation, videoMemoryInfo); } -Result DeviceD3D12::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CalculateAllocationNumber; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; - helperInterface.QueryVideoMemoryInfo = ::QueryVideoMemoryInfo; - - Helper_CommandQueue_PartiallyFillFunctionTableD3D12(helperInterface); +Result DeviceD3D12::FillFunctionTable(HelperInterface& table) const { + table = {}; + Helper_CommandQueue_PartiallyFillFunctionTableD3D12(table); + Helper_Device_PartiallyFillFunctionTableD3D12(table); - return ValidateFunctionTable(helperInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ WrapperD3D12 ] - -static Result NRI_CALL CreateCommandBuffer(Device& device, const CommandBufferD3D12Desc& commandBufferD3D12Desc, CommandBuffer*& commandBuffer) { - return ((DeviceD3D12&)device).CreateCommandBuffer(commandBufferD3D12Desc, commandBuffer); -} - -static Result NRI_CALL CreateDescriptorPool(Device& device, const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { - return ((DeviceD3D12&)device).CreateDescriptorPool(descriptorPoolD3D12Desc, descriptorPool); -} - -static Result NRI_CALL CreateBuffer(Device& device, const BufferD3D12Desc& bufferD3D12Desc, Buffer*& buffer) { - return ((DeviceD3D12&)device).CreateBuffer(bufferD3D12Desc, buffer); -} - -static Result NRI_CALL CreateTexture(Device& device, const TextureD3D12Desc& textureD3D12Desc, Texture*& texture) { - return ((DeviceD3D12&)device).CreateTexture(textureD3D12Desc, texture); -} - -static Result NRI_CALL CreateMemory(Device& device, const MemoryD3D12Desc& memoryD3D12Desc, Memory*& memory) { - return ((DeviceD3D12&)device).CreateMemory(memoryD3D12Desc, memory); -} +#pragma region[ LowLatency ] -static Result NRI_CALL CreateAccelerationStructure(Device& device, const AccelerationStructureD3D12Desc& accelerationStructureD3D12Desc, AccelerationStructure*& memory) { - return ((DeviceD3D12&)device).CreateAccelerationStructure(accelerationStructureD3D12Desc, memory); -} +Result DeviceD3D12::FillFunctionTable(LowLatencyInterface& table) const { + table = {}; + if (!m_Desc.isLowLatencySupported) + return Result::UNSUPPORTED; -Result DeviceD3D12::FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interface) const { - wrapperD3D12Interface = {}; - wrapperD3D12Interface.CreateCommandBufferD3D12 = ::CreateCommandBuffer; - wrapperD3D12Interface.CreateDescriptorPoolD3D12 = ::CreateDescriptorPool; - wrapperD3D12Interface.CreateBufferD3D12 = ::CreateBuffer; - wrapperD3D12Interface.CreateTextureD3D12 = ::CreateTexture; - wrapperD3D12Interface.CreateMemoryD3D12 = ::CreateMemory; - wrapperD3D12Interface.CreateAccelerationStructureD3D12 = ::CreateAccelerationStructure; + LowLatency_CommandQueue_PartiallyFillFunctionTableD3D12(table); + LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTableD3D12(table); - return ValidateFunctionTable(wrapperD3D12Interface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ SwapChain ] - -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceD3D12&)device).CreateSwapChain(swapChainDesc, swapChain); -} - -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; - - DeviceD3D12& device = ((SwapChainD3D12&)swapChain).GetDevice(); - device.DestroySwapChain(swapChain); -} +#pragma region[ MeshShader ] -Result DeviceD3D12::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - swapChainInterface = {}; - if (!m_Desc.isSwapChainSupported) +Result DeviceD3D12::FillFunctionTable(MeshShaderInterface& table) const { + table = {}; + if (!m_Desc.isMeshShaderSupported) return Result::UNSUPPORTED; - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; + MeshShader_CommandBuffer_PartiallyFillFunctionTableD3D12(table); - SwapChain_PartiallyFillFunctionTableD3D12(swapChainInterface); - - return ValidateFunctionTable(swapChainInterface); + return ValidateFunctionTable(table); } #pragma endregion #pragma region[ RayTracing ] +static void NRI_CALL GetAccelerationStructureMemoryDesc( + const Device& device, const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { + ((DeviceD3D12&)device).GetAccelerationStructureMemoryDesc(accelerationStructureDesc, memoryLocation, memoryDesc); +} + static Result NRI_CALL CreateRayTracingPipeline(Device& device, const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline) { - return ((DeviceD3D12&)device).CreatePipeline(rayTracingPipelineDesc, pipeline); + return ((DeviceD3D12&)device).CreateImplementation(pipeline, rayTracingPipelineDesc); } static Result NRI_CALL CreateAccelerationStructure(Device& device, const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return ((DeviceD3D12&)device).CreateAccelerationStructure(accelerationStructureDesc, accelerationStructure); + return ((DeviceD3D12&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); } static void NRI_CALL DestroyAccelerationStructure(AccelerationStructure& accelerationStructure) { - if (!(&accelerationStructure)) - return; - - DeviceD3D12& device = ((AccelerationStructureD3D12&)accelerationStructure).GetDevice(); - device.DestroyAccelerationStructure(accelerationStructure); + Destroy((AccelerationStructureD3D12*)&accelerationStructure); } static Result NRI_CALL BindAccelerationStructureMemory(Device& device, const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { return ((DeviceD3D12&)device).BindAccelerationStructureMemory(memoryBindingDescs, memoryBindingDescNum); } -void FillFunctionTablePipelineD3D12(RayTracingInterface& rayTracingInterface); +void FillFunctionTablePipelineD3D12(RayTracingInterface& table); -Result DeviceD3D12::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { - rayTracingInterface = {}; +Result DeviceD3D12::FillFunctionTable(RayTracingInterface& table) const { + table = {}; if (!m_Desc.isRayTracingSupported) return Result::UNSUPPORTED; - FillFunctionTablePipelineD3D12(rayTracingInterface); - RayTracing_AccelerationStructure_PartiallyFillFunctionTableD3D12(rayTracingInterface); - RayTracing_CommandBuffer_PartiallyFillFunctionTableD3D12(rayTracingInterface); - - rayTracingInterface.CreateRayTracingPipeline = ::CreateRayTracingPipeline; - rayTracingInterface.CreateAccelerationStructure = ::CreateAccelerationStructure; - rayTracingInterface.BindAccelerationStructureMemory = ::BindAccelerationStructureMemory; - rayTracingInterface.DestroyAccelerationStructure = ::DestroyAccelerationStructure; + FillFunctionTablePipelineD3D12(table); + RayTracing_AccelerationStructure_PartiallyFillFunctionTableD3D12(table); + RayTracing_CommandBuffer_PartiallyFillFunctionTableD3D12(table); + RayTracing_Device_PartiallyFillFunctionTableD3D12(table); - return ValidateFunctionTable(rayTracingInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ MeshShader ] +#pragma region[ ResourceAllocator ] -Result DeviceD3D12::FillFunctionTable(MeshShaderInterface& meshShaderInterface) const { - meshShaderInterface = {}; - if (!m_Desc.isMeshShaderSupported) - return Result::UNSUPPORTED; - - MeshShader_CommandBuffer_PartiallyFillFunctionTableD3D12(meshShaderInterface); - - return ValidateFunctionTable(meshShaderInterface); +static Result AllocateBuffer(Device& device, const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { + return ((DeviceD3D12&)device).CreateImplementation(buffer, bufferDesc); } -#pragma endregion - -#pragma region[ LowLatency ] +static Result AllocateTexture(Device& device, const AllocateTextureDesc& textureDesc, Texture*& texture) { + return ((DeviceD3D12&)device).CreateImplementation(texture, textureDesc); +} -Result DeviceD3D12::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { - lowLatencyInterface = {}; - if (!m_Desc.isLowLatencySupported) - return Result::UNSUPPORTED; +static Result AllocateAccelerationStructure(Device& device, const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceD3D12&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); +} - LowLatency_CommandQueue_PartiallyFillFunctionTableD3D12(lowLatencyInterface); - LowLatency_SwapChain_PartiallyFillFunctionTableD3D12(lowLatencyInterface); +Result DeviceD3D12::FillFunctionTable(ResourceAllocatorInterface& table) const { + table = {}; + ResourceAllocator_Device_PartiallyFillFunctionTableD3D12(table); - return ValidateFunctionTable(lowLatencyInterface); + return ValidateFunctionTable(table); } #pragma endregion @@ -452,22 +325,20 @@ Result DeviceD3D12::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { DeviceD3D12& deviceD3D12 = (DeviceD3D12&)device; + StreamerImpl* impl = Allocate(deviceD3D12.GetStdAllocator(), device, deviceD3D12.GetCoreInterface()); + Result result = impl->Create(streamerDesc); - StreamerImpl* implementation = Allocate(deviceD3D12.GetStdAllocator(), device, deviceD3D12.GetCoreInterface()); - Result res = implementation->Create(streamerDesc); - - if (res == Result::SUCCESS) { - streamer = (Streamer*)implementation; - return Result::SUCCESS; - } - - Deallocate(deviceD3D12.GetStdAllocator(), implementation); + if (result != Result::SUCCESS) { + Destroy(deviceD3D12.GetStdAllocator(), impl); + streamer = nullptr; + } else + streamer = (Streamer*)impl; - return res; + return result; } static void DestroyStreamer(Streamer& streamer) { - Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); + Destroy(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); } static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { @@ -498,19 +369,78 @@ static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Stream ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); } -Result DeviceD3D12::FillFunctionTable(StreamerInterface& streamerInterface) const { - streamerInterface = {}; - streamerInterface.CreateStreamer = ::CreateStreamer; - streamerInterface.DestroyStreamer = ::DestroyStreamer; - streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; - streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; - streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; - streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; - streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; - streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; - streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; +Result DeviceD3D12::FillFunctionTable(StreamerInterface& table) const { + table = {}; + Streamer_Device_PartiallyFillFunctionTableD3D12(table); - return ValidateFunctionTable(streamerInterface); + return ValidateFunctionTable(table); } #pragma endregion + +#pragma region[ SwapChain ] + +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceD3D12&)device).CreateImplementation(swapChain, swapChainDesc); +} + +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + Destroy((SwapChainD3D12*)&swapChain); +} + +Result DeviceD3D12::FillFunctionTable(SwapChainInterface& table) const { + table = {}; + if (!m_Desc.isSwapChainSupported) + return Result::UNSUPPORTED; + + SwapChain_Device_PartiallyFillFunctionTableD3D12(table); + SwapChain_SwapChain_PartiallyFillFunctionTableD3D12(table); + + return ValidateFunctionTable(table); +} + +#pragma endregion + +#pragma region[ WrapperD3D12 ] + +static Result NRI_CALL CreateCommandBufferD3D12(Device& device, const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { + return ((DeviceD3D12&)device).CreateImplementation(commandBuffer, commandBufferDesc); +} + +static Result NRI_CALL CreateDescriptorPoolD3D12(Device& device, const DescriptorPoolD3D12Desc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { + return ((DeviceD3D12&)device).CreateImplementation(descriptorPool, descriptorPoolDesc); +} + +static Result NRI_CALL CreateBufferD3D12(Device& device, const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { + return ((DeviceD3D12&)device).CreateImplementation(buffer, bufferDesc); +} + +static Result NRI_CALL CreateTextureD3D12(Device& device, const TextureD3D12Desc& textureDesc, Texture*& texture) { + return ((DeviceD3D12&)device).CreateImplementation(texture, textureDesc); +} + +static Result NRI_CALL CreateMemoryD3D12(Device& device, const MemoryD3D12Desc& memoryDesc, Memory*& memory) { + return ((DeviceD3D12&)device).CreateImplementation(memory, memoryDesc); +} + +static Result NRI_CALL CreateAccelerationStructureD3D12( + Device& device, const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceD3D12&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); +} + +Result DeviceD3D12::FillFunctionTable(WrapperD3D12Interface& table) const { + table = {}; + WrapperD3D12_Device_PartiallyFillFunctionTableD3D12(table); + + return ValidateFunctionTable(table); +} + +#pragma endregion + +Define_Core_Device_PartiallyFillFunctionTable(D3D12); +Define_Helper_Device_PartiallyFillFunctionTable(D3D12); +Define_RayTracing_Device_PartiallyFillFunctionTable(D3D12); +Define_Streamer_Device_PartiallyFillFunctionTable(D3D12); +Define_SwapChain_Device_PartiallyFillFunctionTable(D3D12); +Define_ResourceAllocator_Device_PartiallyFillFunctionTable(D3D12); +Define_WrapperD3D12_Device_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/MemoryD3D12.cpp b/Source/D3D12/MemoryD3D12.cpp index f6bbb78b..9e91538f 100644 --- a/Source/D3D12/MemoryD3D12.cpp +++ b/Source/D3D12/MemoryD3D12.cpp @@ -6,42 +6,12 @@ using namespace nri; -static inline D3D12_HEAP_TYPE GetHeapType(MemoryType memoryType) { - return (D3D12_HEAP_TYPE)(memoryType >> 16); -} - Result MemoryD3D12::Create(const AllocateMemoryDesc& allocateMemoryDesc) { - if (allocateMemoryDesc.priority != 0.0f) { - float p = allocateMemoryDesc.priority * 0.5f + 0.5f; - float level = 0.0f; - - if (p < 0.2f) { - m_Priority = (uint32_t)D3D12_RESIDENCY_PRIORITY_MINIMUM; - level = 0.0f; - } else if (p < 0.4f) { - m_Priority = (uint32_t)D3D12_RESIDENCY_PRIORITY_LOW; - level = 0.2f; - } else if (p < 0.6f) { - m_Priority = (uint32_t)D3D12_RESIDENCY_PRIORITY_NORMAL; - level = 0.4f; - } else if (p < 0.8f) { - m_Priority = (uint32_t)D3D12_RESIDENCY_PRIORITY_HIGH; - level = 0.6f; - } else { - m_Priority = (uint32_t)D3D12_RESIDENCY_PRIORITY_MAXIMUM; - level = 0.8f; - } - - uint32_t bonus = uint32_t(((p - level) / 0.2f) * 65535.0f); - if (bonus > 0xFFFF) - bonus = 0xFFFF; - - m_Priority |= bonus; - } + MemoryTypeInfo memoryTypeInfo = Unpack(allocateMemoryDesc.type); D3D12_HEAP_DESC heapDesc = {}; heapDesc.SizeInBytes = allocateMemoryDesc.size; - heapDesc.Properties.Type = GetHeapType(allocateMemoryDesc.type); + heapDesc.Properties.Type = (D3D12_HEAP_TYPE)memoryTypeInfo.heapType; heapDesc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heapDesc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapDesc.Properties.CreationNodeMask = NRI_NODE_MASK; @@ -49,27 +19,27 @@ Result MemoryD3D12::Create(const AllocateMemoryDesc& allocateMemoryDesc) { heapDesc.Alignment = 0; heapDesc.Flags = (allocateMemoryDesc.size ? GetHeapFlags(allocateMemoryDesc.type) : D3D12_HEAP_FLAG_NONE) | D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - if (!m_Device.IsDedicated(allocateMemoryDesc.type)) { + if (!memoryTypeInfo.mustBeDedicated) { HRESULT hr = m_Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&m_Heap)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device::CreateHeap()"); - SetPriority(m_Heap.GetInterface()); + D3D12_RESIDENCY_PRIORITY residencyPriority = (D3D12_RESIDENCY_PRIORITY)ConvertPriority(allocateMemoryDesc.priority); + if (m_Device.GetVersion() >= 1 && residencyPriority != 0) { + ID3D12Pageable* obj = m_Heap.GetInterface(); + hr = m_Device->SetResidencyPriority(1, &obj, &residencyPriority); + RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device1::SetResidencyPriority()"); + } } m_HeapDesc = heapDesc; + m_Priority = allocateMemoryDesc.priority; return Result::SUCCESS; } Result MemoryD3D12::Create(const MemoryD3D12Desc& memoryDesc) { m_Heap = memoryDesc.d3d12Heap; - m_HeapDesc = m_Heap ? m_Heap->GetDesc() : *memoryDesc.d3d12HeapDesc; + m_HeapDesc = m_Heap->GetDesc(); return Result::SUCCESS; } - -void MemoryD3D12::SetPriority(ID3D12Pageable* obj) const { - D3D12_RESIDENCY_PRIORITY priority = (D3D12_RESIDENCY_PRIORITY)m_Priority; - if (m_Device.GetVersion() >= 1 && m_Priority) - m_Device->SetResidencyPriority(1, &obj, &priority); -} diff --git a/Source/D3D12/MemoryD3D12.h b/Source/D3D12/MemoryD3D12.h index 3d3efd1a..bcd40bde 100644 --- a/Source/D3D12/MemoryD3D12.h +++ b/Source/D3D12/MemoryD3D12.h @@ -21,17 +21,20 @@ struct MemoryD3D12 { return m_HeapDesc; } - inline bool RequiresDedicatedAllocation() const { - return m_Heap.GetInterface() ? false : true; - } - inline DeviceD3D12& GetDevice() const { return m_Device; } + inline bool IsDummy() const { + return m_Heap.GetInterface() == nullptr; + } + + inline float GetPriority() const { + return m_Priority; + } + Result Create(const AllocateMemoryDesc& allocateMemoryDesc); Result Create(const MemoryD3D12Desc& memoryDesc); - void SetPriority(ID3D12Pageable* obj) const; //================================================================================================================ // NRI @@ -45,7 +48,7 @@ struct MemoryD3D12 { DeviceD3D12& m_Device; ComPtr m_Heap; D3D12_HEAP_DESC m_HeapDesc = {}; - uint32_t m_Priority = 0; + float m_Priority = 0.0f; }; } // namespace nri diff --git a/Source/D3D12/PipelineD3D12.cpp b/Source/D3D12/PipelineD3D12.cpp index dd356329..8b5ab564 100644 --- a/Source/D3D12/PipelineD3D12.cpp +++ b/Source/D3D12/PipelineD3D12.cpp @@ -242,7 +242,7 @@ Result PipelineD3D12::CreateFromStream(const GraphicsPipelineDesc& graphicsPipel // Vertex input uint32_t attributeNum = graphicsPipelineDesc.vertexInput ? graphicsPipelineDesc.vertexInput->attributeNum : 0; - stream.inputLayout.desc.pInputElementDescs = STACK_ALLOC(D3D12_INPUT_ELEMENT_DESC, attributeNum); + stream.inputLayout.desc.pInputElementDescs = StackAlloc(D3D12_INPUT_ELEMENT_DESC, attributeNum); if (graphicsPipelineDesc.vertexInput) { const VertexInputDesc& vi = *graphicsPipelineDesc.vertexInput; @@ -330,7 +330,7 @@ Result PipelineD3D12::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { // Vertex input uint32_t attributeNum = graphicsPipelineDesc.vertexInput ? graphicsPipelineDesc.vertexInput->attributeNum : 0; - graphicsPipleineStateDesc.InputLayout.pInputElementDescs = STACK_ALLOC(D3D12_INPUT_ELEMENT_DESC, attributeNum); + graphicsPipleineStateDesc.InputLayout.pInputElementDescs = StackAlloc(D3D12_INPUT_ELEMENT_DESC, attributeNum); if (graphicsPipelineDesc.vertexInput) { const VertexInputDesc& vi = *graphicsPipelineDesc.vertexInput; diff --git a/Source/D3D12/PipelineD3D12.hpp b/Source/D3D12/PipelineD3D12.hpp index 33a37603..66dc7200 100644 --- a/Source/D3D12/PipelineD3D12.hpp +++ b/Source/D3D12/PipelineD3D12.hpp @@ -6,8 +6,8 @@ static Result NRI_CALL WriteShaderGroupIdentifiers(const Pipeline& pipeline, uin return ((const PipelineD3D12&)pipeline).WriteShaderGroupIdentifiers(baseShaderGroupIndex, shaderGroupNum, buffer); } -void FillFunctionTablePipelineD3D12(RayTracingInterface& rayTracingInterface) { - rayTracingInterface.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; +void FillFunctionTablePipelineD3D12(RayTracingInterface& table) { + table.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; } #pragma endregion diff --git a/Source/D3D12/ResourceAllocatorD3D12.cpp b/Source/D3D12/ResourceAllocatorD3D12.cpp new file mode 100644 index 00000000..88207295 --- /dev/null +++ b/Source/D3D12/ResourceAllocatorD3D12.cpp @@ -0,0 +1,150 @@ +// © 2021 NVIDIA Corporation + +#include "SharedD3D12.h" + +#include "AccelerationStructureD3D12.h" +#include "BufferD3D12.h" +#include "TextureD3D12.h" + +using namespace nri; + +Result DeviceD3D12::CreateVma() { + if (m_Vma) + return Result::SUCCESS; + + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; + allocatorDesc.pDevice = m_Device; + allocatorDesc.pAdapter = m_Adapter; + allocatorDesc.pAllocationCallbacks = m_AllocationCallbackPtr; + allocatorDesc.Flags = (D3D12MA::ALLOCATOR_FLAGS)( + D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED | D3D12MA::ALLOCATOR_FLAG_MSAA_TEXTURES_ALWAYS_COMMITTED | D3D12MA::ALLOCATOR_FLAG_DONT_PREFER_SMALL_BUFFERS_COMMITTED); + + HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &m_Vma); + RETURN_ON_BAD_HRESULT(this, hr, "D3D12MA::CreateAllocator"); + + return Result::SUCCESS; +} + +Result BufferD3D12::Create(const AllocateBufferDesc& bufferDesc, bool isAccelerationStructureBuffer) { + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.HeapType = GetHeapType(bufferDesc.memoryLocation); + allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_CAN_ALIAS; + +#ifdef NRI_USE_AGILITY_SDK + if (m_Device.GetVersion() >= 10) { + D3D12_RESOURCE_DESC1 desc1 = {}; + GetResourceDesc((D3D12_RESOURCE_DESC*)&desc1, bufferDesc.desc); + + uint32_t castableFormatNum = 0; + DXGI_FORMAT* castableFormats = nullptr; // TODO: add castable formats, see options12.RelaxedFormatCastingSupported + + HRESULT hr = m_Device.GetVma()->CreateResource3( + &allocationDesc, &desc1, D3D12_BARRIER_LAYOUT_UNDEFINED, nullptr, castableFormatNum, castableFormats, &m_VmaAllocation, IID_PPV_ARGS(&m_Buffer)); + + RETURN_ON_BAD_HRESULT(&m_Device, hr, "D3D12MA::CreateResource3()"); + } else +#endif + { + D3D12_RESOURCE_DESC desc = {}; + GetResourceDesc(&desc, bufferDesc.desc); + + D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON; + if (bufferDesc.memoryLocation == MemoryLocation::HOST_UPLOAD || bufferDesc.memoryLocation == MemoryLocation::DEVICE_UPLOAD) + initialState |= D3D12_RESOURCE_STATE_GENERIC_READ; + else if (bufferDesc.memoryLocation == MemoryLocation::HOST_READBACK) + initialState |= D3D12_RESOURCE_STATE_COPY_DEST; + + if (isAccelerationStructureBuffer) + initialState |= D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; + + HRESULT hr = m_Device.GetVma()->CreateResource(&allocationDesc, &desc, initialState, nullptr, &m_VmaAllocation, IID_PPV_ARGS(&m_Buffer)); + + RETURN_ON_BAD_HRESULT(&m_Device, hr, "D3D12MA::CreateResource3()"); + } + + // Priority + D3D12_RESIDENCY_PRIORITY residencyPriority = (D3D12_RESIDENCY_PRIORITY)ConvertPriority(bufferDesc.memoryPriority); + if (m_Device.GetVersion() >= 1 && residencyPriority != 0) { + ID3D12Pageable* obj = m_Buffer.GetInterface(); + HRESULT hr = m_Device->SetResidencyPriority(1, &obj, &residencyPriority); + RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device1::SetResidencyPriority()"); + } + + m_Desc = bufferDesc.desc; + + return Result::SUCCESS; +} + +Result TextureD3D12::Create(const AllocateTextureDesc& textureDesc) { + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + D3D12_CLEAR_VALUE clearValue = {GetDxgiFormat(textureDesc.desc.format).typed}; + + D3D12MA::ALLOCATION_DESC allocationDesc = {}; + allocationDesc.HeapType = GetHeapType(textureDesc.memoryLocation); + allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_CAN_ALIAS; + +#ifdef NRI_USE_AGILITY_SDK + if (m_Device.GetVersion() >= 10) { + D3D12_RESOURCE_DESC1 desc1 = {}; + GetResourceDesc((D3D12_RESOURCE_DESC*)&desc1, textureDesc.desc); + + bool isRenderableSurface = desc1.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + uint32_t castableFormatNum = 0; + DXGI_FORMAT* castableFormats = nullptr; // TODO: add castable formats, see options12.RelaxedFormatCastingSupported + + HRESULT hr = m_Device.GetVma()->CreateResource3(&allocationDesc, &desc1, D3D12_BARRIER_LAYOUT_COMMON, isRenderableSurface ? &clearValue : nullptr, castableFormatNum, + castableFormats, &m_VmaAllocation, IID_PPV_ARGS(&m_Texture)); + + RETURN_ON_BAD_HRESULT(&m_Device, hr, "D3D12MA::CreateResource3()"); + } else +#endif + { + D3D12_RESOURCE_DESC desc = {}; + GetResourceDesc(&desc, textureDesc.desc); + + bool isRenderableSurface = desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + + HRESULT hr = m_Device.GetVma()->CreateResource( + &allocationDesc, &desc, D3D12_RESOURCE_STATE_COMMON, isRenderableSurface ? &clearValue : nullptr, &m_VmaAllocation, IID_PPV_ARGS(&m_Texture)); + + RETURN_ON_BAD_HRESULT(&m_Device, hr, "D3D12MA::CreateResource3()"); + } + + // Priority + D3D12_RESIDENCY_PRIORITY residencyPriority = (D3D12_RESIDENCY_PRIORITY)ConvertPriority(textureDesc.memoryPriority); + if (m_Device.GetVersion() >= 1 && residencyPriority != 0) { + ID3D12Pageable* obj = m_Texture.GetInterface(); + HRESULT hr = m_Device->SetResidencyPriority(1, &obj, &residencyPriority); + RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device1::SetResidencyPriority()"); + } + + m_Desc = textureDesc.desc; + + return Result::SUCCESS; +} + +Result AccelerationStructureD3D12::Create(const AllocateAccelerationStructureDesc& accelerationStructureDesc) { + if (m_Device.GetVersion() < 5) + return Result::UNSUPPORTED; + + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + m_Device.GetAccelerationStructurePrebuildInfo(accelerationStructureDesc.desc, m_PrebuildInfo); + + AllocateBufferDesc bufferDesc = {}; + bufferDesc.memoryLocation = accelerationStructureDesc.memoryLocation; + bufferDesc.memoryPriority = accelerationStructureDesc.memoryPriority; + bufferDesc.desc.size = m_PrebuildInfo.ResultDataMaxSizeInBytes; + bufferDesc.desc.usageMask = BufferUsageBits::RAY_TRACING_BUFFER; + + return m_Device.CreateImplementation((Buffer*&)m_Buffer, bufferDesc, ACCELERATION_STRUCTURE_BUFFER); +} diff --git a/Source/D3D12/SharedD3D12.cpp b/Source/D3D12/SharedD3D12.cpp index 57ede475..7f48ab54 100644 --- a/Source/D3D12/SharedD3D12.cpp +++ b/Source/D3D12/SharedD3D12.cpp @@ -28,7 +28,7 @@ D3D12_HEAP_FLAGS nri::GetHeapFlags(MemoryType memoryType) { D3D12_RESOURCE_FLAGS nri::GetBufferFlags(BufferUsageBits bufferUsageMask) { D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE; - if (bufferUsageMask & BufferUsageBits::SHADER_RESOURCE_STORAGE) + if (bufferUsageMask & (BufferUsageBits::SHADER_RESOURCE_STORAGE | BufferUsageBits::RAY_TRACING_BUFFER)) flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; return flags; @@ -320,7 +320,41 @@ void nri::ConvertRects(D3D12_RECT* rectsD3D12, const Rect* rects, uint32_t rectN } uint64_t nri::GetMemorySizeD3D12(const MemoryD3D12Desc& memoryD3D12Desc) { - return memoryD3D12Desc.d3d12Heap ? memoryD3D12Desc.d3d12Heap->GetDesc().SizeInBytes : memoryD3D12Desc.d3d12HeapDesc->SizeInBytes; + return memoryD3D12Desc.d3d12Heap->GetDesc().SizeInBytes; +} + +D3D12_RESIDENCY_PRIORITY nri::ConvertPriority(float priority) { + if (priority == 0.0f) + return (D3D12_RESIDENCY_PRIORITY)0; + + float p = priority * 0.5f + 0.5f; + float level = 0.0f; + + uint32_t result = 0; + if (p < 0.2f) { + result = (uint32_t)D3D12_RESIDENCY_PRIORITY_MINIMUM; + level = 0.0f; + } else if (p < 0.4f) { + result = (uint32_t)D3D12_RESIDENCY_PRIORITY_LOW; + level = 0.2f; + } else if (p < 0.6f) { + result = (uint32_t)D3D12_RESIDENCY_PRIORITY_NORMAL; + level = 0.4f; + } else if (p < 0.8f) { + result = (uint32_t)D3D12_RESIDENCY_PRIORITY_HIGH; + level = 0.6f; + } else { + result = (uint32_t)D3D12_RESIDENCY_PRIORITY_MAXIMUM; + level = 0.8f; + } + + uint32_t bonus = uint32_t(((p - level) / 0.2f) * 65535.0f); + if (bonus > 0xFFFF) + bonus = 0xFFFF; + + result |= bonus; + + return (D3D12_RESIDENCY_PRIORITY)result; } bool nri::GetTextureDesc(const TextureD3D12Desc& textureD3D12Desc, TextureDesc& textureDesc) { @@ -381,6 +415,21 @@ bool nri::GetBufferDesc(const BufferD3D12Desc& bufferD3D12Desc, BufferDesc& buff return true; } +constexpr std::array HEAP_TYPES = { + D3D12_HEAP_TYPE_DEFAULT, // DEVICE +#ifdef NRI_USE_AGILITY_SDK + D3D12_HEAP_TYPE_GPU_UPLOAD, // DEVICE_UPLOAD (Prerequisite: D3D12_FEATURE_D3D12_OPTIONS16) +#else + D3D12_HEAP_TYPE_UPLOAD, // DEVICE_UPLOAD (silent fallback to HOST_UPLOAD) +#endif + D3D12_HEAP_TYPE_UPLOAD, // HOST_UPLOAD + D3D12_HEAP_TYPE_READBACK, // HOST_READBACK +}; + +D3D12_HEAP_TYPE nri::GetHeapType(MemoryLocation memoryLocation) { + return HEAP_TYPES[(size_t)memoryLocation]; +} + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE nri::GetAccelerationStructureType(AccelerationStructureType accelerationStructureType) { static_assert(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL == (uint32_t)AccelerationStructureType::TOP_LEVEL, "Unsupported AccelerationStructureType."); static_assert(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL == (uint32_t)AccelerationStructureType::BOTTOM_LEVEL, "Unsupported AccelerationStructureType."); diff --git a/Source/D3D12/SharedD3D12.h b/Source/D3D12/SharedD3D12.h index 9b55aa3b..3966cb49 100644 --- a/Source/D3D12/SharedD3D12.h +++ b/Source/D3D12/SharedD3D12.h @@ -13,6 +13,22 @@ typedef uint16_t HeapOffsetType; #define BASE_ATTRIBUTES_EMULATION_SPACE 999 // see NRI_ENABLE_DRAW_PARAMETERS +struct MemoryTypeInfo { + uint16_t heapFlags; + uint8_t heapType; + bool mustBeDedicated; +}; + +inline nri::MemoryType Pack(const MemoryTypeInfo& memoryTypeInfo) { + return *(nri::MemoryType*)&memoryTypeInfo; +} + +inline MemoryTypeInfo Unpack(const nri::MemoryType& memoryType) { + return *(MemoryTypeInfo*)&memoryType; +} + +static_assert(sizeof(MemoryTypeInfo) == sizeof(nri::MemoryType), "Must be equal"); + namespace nri { enum DescriptorHeapType : uint32_t { RESOURCE = 0, @@ -32,22 +48,21 @@ struct DescriptorHeapDesc { uint32_t descriptorSize; }; -D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE GetAccelerationStructureType(AccelerationStructureType accelerationStructureType); -D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS GetAccelerationStructureBuildFlags(AccelerationStructureBuildBits accelerationStructureBuildFlags); -D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE GetCopyMode(CopyMode copyMode); - void GetResourceDesc(D3D12_RESOURCE_DESC* desc, const BufferDesc& bufferDesc); void GetResourceDesc(D3D12_RESOURCE_DESC* desc, const TextureDesc& textureDesc); - -D3D12_RESOURCE_FLAGS GetBufferFlags(BufferUsageBits bufferUsageMask); -D3D12_RESOURCE_FLAGS GetTextureFlags(TextureUsageBits textureUsageMask); - void ConvertGeometryDescs(D3D12_RAYTRACING_GEOMETRY_DESC* geometryDescs, const GeometryObject* geometryObjects, uint32_t geometryObjectNum); void ConvertRects(D3D12_RECT* rectsD3D12, const Rect* rects, uint32_t rectNum); bool GetTextureDesc(const TextureD3D12Desc& textureD3D12Desc, TextureDesc& textureDesc); bool GetBufferDesc(const BufferD3D12Desc& bufferD3D12Desc, BufferDesc& bufferDesc); uint64_t GetMemorySizeD3D12(const MemoryD3D12Desc& memoryD3D12Desc); +D3D12_RESIDENCY_PRIORITY ConvertPriority(float priority); +D3D12_HEAP_TYPE GetHeapType(MemoryLocation memoryLocation); +D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE GetAccelerationStructureType(AccelerationStructureType accelerationStructureType); +D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS GetAccelerationStructureBuildFlags(AccelerationStructureBuildBits accelerationStructureBuildFlags); +D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE GetCopyMode(CopyMode copyMode); +D3D12_RESOURCE_FLAGS GetBufferFlags(BufferUsageBits bufferUsageMask); +D3D12_RESOURCE_FLAGS GetTextureFlags(TextureUsageBits textureUsageMask); D3D12_FILTER GetFilterIsotropic(Filter mip, Filter magnification, Filter minification, FilterExt filterExt, bool useComparison); D3D12_FILTER GetFilterAnisotropic(FilterExt filterExt, bool useComparison); D3D12_TEXTURE_ADDRESS_MODE GetAddressMode(AddressMode addressMode); @@ -79,4 +94,27 @@ namespace d3d12 { #include "D3DExt.h" } +// VMA +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-parameter" +#else +# pragma warning(push) +# pragma warning(disable : 4100) // unreferenced formal parameter +#endif + +#define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED +#include "memalloc/D3D12MemAlloc.h" + +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(MSVC) +# pragma warning(pop) +#endif + #include "DeviceD3D12.h" diff --git a/Source/D3D12/SwapChainD3D12.cpp b/Source/D3D12/SwapChainD3D12.cpp index 1fe60101..0ed267af 100644 --- a/Source/D3D12/SwapChainD3D12.cpp +++ b/Source/D3D12/SwapChainD3D12.cpp @@ -47,7 +47,7 @@ SwapChainD3D12::~SwapChainD3D12() { CloseHandle(m_FrameLatencyWaitableObject); for (TextureD3D12* texture : m_Textures) - Deallocate(m_Device.GetStdAllocator(), texture); + Destroy(m_Device.GetStdAllocator(), texture); } Result SwapChainD3D12::Create(const SwapChainDesc& swapChainDesc) { diff --git a/Source/D3D12/SwapChainD3D12.hpp b/Source/D3D12/SwapChainD3D12.hpp index cf055574..a37665cd 100644 --- a/Source/D3D12/SwapChainD3D12.hpp +++ b/Source/D3D12/SwapChainD3D12.hpp @@ -48,5 +48,5 @@ static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latenc #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(D3D12); -Define_LowLatency_SwapChain_PartiallyFillFunctionTable(D3D12); +Define_SwapChain_SwapChain_PartiallyFillFunctionTable(D3D12); +Define_LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/TextureD3D12.cpp b/Source/D3D12/TextureD3D12.cpp index db561762..b9e16404 100644 --- a/Source/D3D12/TextureD3D12.cpp +++ b/Source/D3D12/TextureD3D12.cpp @@ -45,8 +45,8 @@ Result TextureD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset) { return Result::SUCCESS; D3D12_CLEAR_VALUE clearValue = {GetDxgiFormat(m_Desc.format).typed}; - const D3D12_HEAP_DESC& heapDesc = memory->GetHeapDesc(); + const D3D12_HEAP_DESC& heapDesc = memory->GetHeapDesc(); // STATE_CREATION ERROR #640: CREATERESOURCEANDHEAP_INVALIDHEAPMISCFLAGS D3D12_HEAP_FLAGS heapFlagsFixed = heapDesc.Flags & ~(D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES | D3D12_HEAP_FLAG_DENY_BUFFERS); @@ -55,12 +55,12 @@ Result TextureD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset) { D3D12_RESOURCE_DESC1 desc1 = {}; GetResourceDesc((D3D12_RESOURCE_DESC*)&desc1, m_Desc); + const D3D12_BARRIER_LAYOUT initialLayout = D3D12_BARRIER_LAYOUT_COMMON; bool isRenderableSurface = desc1.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); - D3D12_BARRIER_LAYOUT initialLayout = D3D12_BARRIER_LAYOUT_COMMON; uint32_t castableFormatNum = 0; DXGI_FORMAT* castableFormats = nullptr; // TODO: add castable formats, see options12.RelaxedFormatCastingSupported - if (memory->RequiresDedicatedAllocation()) { + if (memory->IsDummy()) { HRESULT hr = m_Device->CreateCommittedResource3(&heapDesc.Properties, heapFlagsFixed, &desc1, initialLayout, isRenderableSurface ? &clearValue : nullptr, nullptr, castableFormatNum, castableFormats, IID_PPV_ARGS(&m_Texture)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device10::CreateCommittedResource3()"); @@ -75,20 +75,26 @@ Result TextureD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset) { D3D12_RESOURCE_DESC desc = {}; GetResourceDesc(&desc, m_Desc); + const D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON; bool isRenderableSurface = desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); - if (memory->RequiresDedicatedAllocation()) { - HRESULT hr = m_Device->CreateCommittedResource( - &heapDesc.Properties, heapFlagsFixed, &desc, D3D12_RESOURCE_STATE_COMMON, isRenderableSurface ? &clearValue : nullptr, IID_PPV_ARGS(&m_Texture)); + if (memory->IsDummy()) { + HRESULT hr = + m_Device->CreateCommittedResource(&heapDesc.Properties, heapFlagsFixed, &desc, initialState, isRenderableSurface ? &clearValue : nullptr, IID_PPV_ARGS(&m_Texture)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device::CreateCommittedResource()"); } else { - HRESULT hr = m_Device->CreatePlacedResource(*memory, offset, &desc, D3D12_RESOURCE_STATE_COMMON, isRenderableSurface ? &clearValue : nullptr, IID_PPV_ARGS(&m_Texture)); + HRESULT hr = m_Device->CreatePlacedResource(*memory, offset, &desc, initialState, isRenderableSurface ? &clearValue : nullptr, IID_PPV_ARGS(&m_Texture)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device::CreatePlacedResource()"); } } - if (memory->RequiresDedicatedAllocation()) - memory->SetPriority(m_Texture.GetInterface()); + // Priority + D3D12_RESIDENCY_PRIORITY residencyPriority = (D3D12_RESIDENCY_PRIORITY)ConvertPriority(memory->GetPriority()); + if (m_Device.GetVersion() >= 1 && residencyPriority != 0) { + ID3D12Pageable* obj = m_Texture.GetInterface(); + HRESULT hr = m_Device->SetResidencyPriority(1, &obj, &residencyPriority); + RETURN_ON_BAD_HRESULT(&m_Device, hr, "ID3D12Device1::SetResidencyPriority()"); + } return Result::SUCCESS; } diff --git a/Source/D3D12/TextureD3D12.h b/Source/D3D12/TextureD3D12.h index 445e4ace..b6f84d2f 100644 --- a/Source/D3D12/TextureD3D12.h +++ b/Source/D3D12/TextureD3D12.h @@ -2,8 +2,6 @@ #pragma once -#include "SharedD3D12.h" - #ifdef NRI_USE_AGILITY_SDK struct ID3D12Resource2; typedef ID3D12Resource2 ID3D12ResourceBest; @@ -46,6 +44,7 @@ struct TextureD3D12 { Result Create(const TextureDesc& textureDesc); Result Create(const TextureD3D12Desc& textureDesc); + Result Create(const AllocateTextureDesc& textureDesc); Result BindMemory(const MemoryD3D12* memory, uint64_t offset); //================================================================================================================ @@ -58,8 +57,9 @@ struct TextureD3D12 { private: DeviceD3D12& m_Device; - TextureDesc m_Desc = {}; ComPtr m_Texture; + ComPtr m_VmaAllocation = nullptr; + TextureDesc m_Desc = {}; }; } // namespace nri diff --git a/Source/Shared/DeviceBase.h b/Source/Shared/DeviceBase.h index c8c9fa48..50b9ed09 100644 --- a/Source/Shared/DeviceBase.h +++ b/Source/Shared/DeviceBase.h @@ -31,7 +31,7 @@ struct DeviceBase { } virtual const DeviceDesc& GetDesc() const = 0; - virtual void Destroy() = 0; + virtual void Destruct() = 0; virtual Result FillFunctionTable(CoreInterface& table) const { table = {}; @@ -43,22 +43,22 @@ struct DeviceBase { return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(StreamerInterface& table) const { + virtual Result FillFunctionTable(LowLatencyInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(WrapperD3D11Interface& table) const { + virtual Result FillFunctionTable(MeshShaderInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(WrapperD3D12Interface& table) const { + virtual Result FillFunctionTable(RayTracingInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(WrapperVKInterface& table) const { + virtual Result FillFunctionTable(StreamerInterface& table) const { table = {}; return Result::UNSUPPORTED; } @@ -68,17 +68,22 @@ struct DeviceBase { return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(RayTracingInterface& table) const { + virtual Result FillFunctionTable(ResourceAllocatorInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(MeshShaderInterface& table) const { + virtual Result FillFunctionTable(WrapperD3D11Interface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(LowLatencyInterface& table) const { + virtual Result FillFunctionTable(WrapperD3D12Interface& table) const { + table = {}; + return Result::UNSUPPORTED; + } + + virtual Result FillFunctionTable(WrapperVKInterface& table) const { table = {}; return Result::UNSUPPORTED; } @@ -97,16 +102,25 @@ struct DeviceBase { void Core_Descriptor_PartiallyFillFunctionTable##API(CoreInterface& table); \ void Core_DescriptorPool_PartiallyFillFunctionTable##API(CoreInterface& table); \ void Core_DescriptorSet_PartiallyFillFunctionTable##API(CoreInterface& table); \ + void Core_Device_PartiallyFillFunctionTable##API(CoreInterface& table); \ void Core_Fence_PartiallyFillFunctionTable##API(CoreInterface& table); \ void Core_QueryPool_PartiallyFillFunctionTable##API(CoreInterface& table); \ void Core_Texture_PartiallyFillFunctionTable##API(CoreInterface& table); \ - void SwapChain_PartiallyFillFunctionTable##API(SwapChainInterface& table); \ - void RayTracing_CommandBuffer_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ - void RayTracing_AccelerationStructure_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ - void MeshShader_CommandBuffer_PartiallyFillFunctionTable##API(MeshShaderInterface& table); \ void Helper_CommandQueue_PartiallyFillFunctionTable##API(HelperInterface& table); \ - void LowLatency_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table); \ - void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table) + void Helper_Device_PartiallyFillFunctionTable##API(HelperInterface& table); \ + void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table); \ + void LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table); \ + void MeshShader_CommandBuffer_PartiallyFillFunctionTable##API(MeshShaderInterface& table); \ + void RayTracing_AccelerationStructure_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ + void RayTracing_CommandBuffer_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ + void RayTracing_Device_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ + void Streamer_Device_PartiallyFillFunctionTable##API(StreamerInterface& table); \ + void SwapChain_SwapChain_PartiallyFillFunctionTable##API(SwapChainInterface& table); \ + void SwapChain_Device_PartiallyFillFunctionTable##API(SwapChainInterface& table); \ + void ResourceAllocator_Device_PartiallyFillFunctionTable##API(ResourceAllocatorInterface& table); \ + void WrapperD3D11_Device_PartiallyFillFunctionTable##API(WrapperD3D11Interface& table); \ + void WrapperD3D12_Device_PartiallyFillFunctionTable##API(WrapperD3D12Interface& table); \ + void WrapperVK_Device_PartiallyFillFunctionTable##API(WrapperVKInterface& table) #define Define_Core_Buffer_PartiallyFillFunctionTable(API) \ void Core_Buffer_PartiallyFillFunctionTable##API(CoreInterface& table) { \ @@ -125,7 +139,6 @@ struct DeviceBase { #define Define_Core_CommandBuffer_PartiallyFillFunctionTable(API) \ void Core_CommandBuffer_PartiallyFillFunctionTable##API(CoreInterface& table) { \ - table.DestroyCommandBuffer = ::DestroyCommandBuffer; \ table.BeginCommandBuffer = ::BeginCommandBuffer; \ table.CmdSetDescriptorPool = ::CmdSetDescriptorPool; \ table.CmdSetDescriptorSet = ::CmdSetDescriptorSet; \ @@ -194,6 +207,50 @@ struct DeviceBase { table.CopyDescriptorSet = ::CopyDescriptorSet; \ } +#define Define_Core_Device_PartiallyFillFunctionTable(API) \ + void Core_Device_PartiallyFillFunctionTable##API(CoreInterface& table) { \ + table.GetDeviceDesc = ::GetDeviceDesc; \ + table.GetBufferDesc = ::GetBufferDesc; \ + table.GetTextureDesc = ::GetTextureDesc; \ + table.GetFormatSupport = ::GetFormatSupport; \ + table.GetBufferMemoryDesc = ::GetBufferMemoryDesc; \ + table.GetTextureMemoryDesc = ::GetTextureMemoryDesc; \ + table.GetCommandQueue = ::GetCommandQueue; \ + table.CreateCommandAllocator = ::CreateCommandAllocator; \ + table.CreateDescriptorPool = ::CreateDescriptorPool; \ + table.CreateBuffer = ::CreateBuffer; \ + table.CreateTexture = ::CreateTexture; \ + table.CreateBufferView = ::CreateBufferView; \ + table.CreateTexture1DView = ::CreateTexture1DView; \ + table.CreateTexture2DView = ::CreateTexture2DView; \ + table.CreateTexture3DView = ::CreateTexture3DView; \ + table.CreateSampler = ::CreateSampler; \ + table.CreatePipelineLayout = ::CreatePipelineLayout; \ + table.CreateGraphicsPipeline = ::CreateGraphicsPipeline; \ + table.CreateComputePipeline = ::CreateComputePipeline; \ + table.CreateQueryPool = ::CreateQueryPool; \ + table.CreateFence = ::CreateFence; \ + table.DestroyCommandBuffer = ::DestroyCommandBuffer; \ + table.DestroyCommandAllocator = ::DestroyCommandAllocator; \ + table.DestroyDescriptorPool = ::DestroyDescriptorPool; \ + table.DestroyBuffer = ::DestroyBuffer; \ + table.DestroyTexture = ::DestroyTexture; \ + table.DestroyDescriptor = ::DestroyDescriptor; \ + table.DestroyPipelineLayout = ::DestroyPipelineLayout; \ + table.DestroyPipeline = ::DestroyPipeline; \ + table.DestroyQueryPool = ::DestroyQueryPool; \ + table.DestroyFence = ::DestroyFence; \ + table.AllocateMemory = ::AllocateMemory; \ + table.BindBufferMemory = ::BindBufferMemory; \ + table.BindTextureMemory = ::BindTextureMemory; \ + table.FreeMemory = ::FreeMemory; \ + table.SetDeviceDebugName = ::SetDeviceDebugName; \ + table.SetPipelineDebugName = ::SetPipelineDebugName; \ + table.SetPipelineLayoutDebugName = ::SetPipelineLayoutDebugName; \ + table.SetMemoryDebugName = ::SetMemoryDebugName; \ + table.GetDeviceNativeObject = ::GetDeviceNativeObject; \ + } + #define Define_Core_Fence_PartiallyFillFunctionTable(API) \ void Core_Fence_PartiallyFillFunctionTable##API(CoreInterface& table) { \ table.GetFenceValue = ::GetFenceValue; \ @@ -213,14 +270,46 @@ struct DeviceBase { table.GetTextureNativeObject = ::GetTextureNativeObject; \ } -#define Define_SwapChain_PartiallyFillFunctionTable(API) \ - void SwapChain_PartiallyFillFunctionTable##API(SwapChainInterface& table) { \ - table.SetSwapChainDebugName = ::SetSwapChainDebugName; \ - table.GetSwapChainTextures = ::GetSwapChainTextures; \ - table.AcquireNextSwapChainTexture = ::AcquireNextSwapChainTexture; \ - table.WaitForPresent = ::WaitForPresent; \ - table.QueuePresent = ::QueuePresent; \ - table.GetDisplayDesc = ::GetDisplayDesc; \ +#define Define_Helper_CommandQueue_PartiallyFillFunctionTable(API) \ + void Helper_CommandQueue_PartiallyFillFunctionTable##API(HelperInterface& table) { \ + table.UploadData = ::UploadData; \ + table.WaitForIdle = ::WaitForIdle; \ + } + +#define Define_Helper_Device_PartiallyFillFunctionTable(API) \ + void Helper_Device_PartiallyFillFunctionTable##API(HelperInterface& table) { \ + table.CalculateAllocationNumber = ::CalculateAllocationNumber; \ + table.AllocateAndBindMemory = ::AllocateAndBindMemory; \ + table.QueryVideoMemoryInfo = ::QueryVideoMemoryInfo; \ + } + +#define Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(API) \ + void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ + table.QueueSubmitTrackable = ::QueueSubmitTrackable; \ + } + +#define Define_LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable(API) \ + void LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ + table.SetLatencySleepMode = ::SetLatencySleepMode; \ + table.SetLatencyMarker = ::SetLatencyMarker; \ + table.LatencySleep = ::LatencySleep; \ + table.GetLatencyReport = ::GetLatencyReport; \ + } + +#define Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(API) \ + void MeshShader_CommandBuffer_PartiallyFillFunctionTable##API(MeshShaderInterface& table) { \ + table.CmdDrawMeshTasks = ::CmdDrawMeshTasks; \ + table.CmdDrawMeshTasksIndirect = ::CmdDrawMeshTasksIndirect; \ + } + +#define Define_RayTracing_AccelerationStructure_PartiallyFillFunctionTable(API) \ + void RayTracing_AccelerationStructure_PartiallyFillFunctionTable##API(RayTracingInterface& table) { \ + table.CreateAccelerationStructureDescriptor = ::CreateAccelerationStructureDescriptor; \ + table.GetAccelerationStructureUpdateScratchBufferSize = ::GetAccelerationStructureUpdateScratchBufferSize; \ + table.GetAccelerationStructureBuildScratchBufferSize = ::GetAccelerationStructureBuildScratchBufferSize; \ + table.GetAccelerationStructureHandle = ::GetAccelerationStructureHandle; \ + table.SetAccelerationStructureDebugName = ::SetAccelerationStructureDebugName; \ + table.GetAccelerationStructureNativeObject = ::GetAccelerationStructureNativeObject; \ } #define Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(API) \ @@ -235,38 +324,83 @@ struct DeviceBase { table.CmdDispatchRaysIndirect = ::CmdDispatchRaysIndirect; \ } -#define Define_RayTracing_AccelerationStructure_PartiallyFillFunctionTable(API) \ - void RayTracing_AccelerationStructure_PartiallyFillFunctionTable##API(RayTracingInterface& table) { \ - table.CreateAccelerationStructureDescriptor = ::CreateAccelerationStructureDescriptor; \ +#define Define_RayTracing_Device_PartiallyFillFunctionTable(API) \ + void RayTracing_Device_PartiallyFillFunctionTable##API(RayTracingInterface& table) { \ table.GetAccelerationStructureMemoryDesc = ::GetAccelerationStructureMemoryDesc; \ - table.GetAccelerationStructureUpdateScratchBufferSize = ::GetAccelerationStructureUpdateScratchBufferSize; \ - table.GetAccelerationStructureBuildScratchBufferSize = ::GetAccelerationStructureBuildScratchBufferSize; \ - table.GetAccelerationStructureHandle = ::GetAccelerationStructureHandle; \ - table.SetAccelerationStructureDebugName = ::SetAccelerationStructureDebugName; \ - table.GetAccelerationStructureNativeObject = ::GetAccelerationStructureNativeObject; \ + table.CreateRayTracingPipeline = ::CreateRayTracingPipeline; \ + table.CreateAccelerationStructure = ::CreateAccelerationStructure; \ + table.BindAccelerationStructureMemory = ::BindAccelerationStructureMemory; \ + table.DestroyAccelerationStructure = ::DestroyAccelerationStructure; \ } -#define Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(API) \ - void MeshShader_CommandBuffer_PartiallyFillFunctionTable##API(MeshShaderInterface& table) { \ - table.CmdDrawMeshTasks = ::CmdDrawMeshTasks; \ - table.CmdDrawMeshTasksIndirect = ::CmdDrawMeshTasksIndirect; \ +#define Define_Streamer_Device_PartiallyFillFunctionTable(API) \ + void Streamer_Device_PartiallyFillFunctionTable##API(StreamerInterface& table) { \ + table.CreateStreamer = ::CreateStreamer; \ + table.DestroyStreamer = ::DestroyStreamer; \ + table.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; \ + table.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; \ + table.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; \ + table.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; \ + table.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; \ + table.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; \ + table.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; \ } -#define Define_Helper_CommandQueue_PartiallyFillFunctionTable(API) \ - void Helper_CommandQueue_PartiallyFillFunctionTable##API(HelperInterface& table) { \ - table.UploadData = ::UploadData; \ - table.WaitForIdle = ::WaitForIdle; \ +#define Define_SwapChain_Device_PartiallyFillFunctionTable(API) \ + void SwapChain_Device_PartiallyFillFunctionTable##API(SwapChainInterface& table) { \ + table.CreateSwapChain = ::CreateSwapChain; \ + table.DestroySwapChain = ::DestroySwapChain; \ } -#define Define_LowLatency_SwapChain_PartiallyFillFunctionTable(API) \ - void LowLatency_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ - table.SetLatencySleepMode = ::SetLatencySleepMode; \ - table.SetLatencyMarker = ::SetLatencyMarker; \ - table.LatencySleep = ::LatencySleep; \ - table.GetLatencyReport = ::GetLatencyReport; \ +#define Define_SwapChain_SwapChain_PartiallyFillFunctionTable(API) \ + void SwapChain_SwapChain_PartiallyFillFunctionTable##API(SwapChainInterface& table) { \ + table.SetSwapChainDebugName = ::SetSwapChainDebugName; \ + table.GetSwapChainTextures = ::GetSwapChainTextures; \ + table.AcquireNextSwapChainTexture = ::AcquireNextSwapChainTexture; \ + table.WaitForPresent = ::WaitForPresent; \ + table.QueuePresent = ::QueuePresent; \ + table.GetDisplayDesc = ::GetDisplayDesc; \ } -#define Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(API) \ - void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ - table.QueueSubmitTrackable = ::QueueSubmitTrackable; \ +#define Define_ResourceAllocator_Device_PartiallyFillFunctionTable(API) \ + void ResourceAllocator_Device_PartiallyFillFunctionTable##API(ResourceAllocatorInterface& table) { \ + table.AllocateBuffer = ::AllocateBuffer; \ + table.AllocateTexture = ::AllocateTexture; \ + table.AllocateAccelerationStructure = ::AllocateAccelerationStructure; \ + } + +#define Define_WrapperD3D11_Device_PartiallyFillFunctionTable(API) \ + void WrapperD3D11_Device_PartiallyFillFunctionTable##API(WrapperD3D11Interface& table) { \ + table.CreateCommandBufferD3D11 = ::CreateCommandBufferD3D11; \ + table.CreateTextureD3D11 = ::CreateTextureD3D11; \ + table.CreateBufferD3D11 = ::CreateBufferD3D11; \ + } + +#define Define_WrapperD3D12_Device_PartiallyFillFunctionTable(API) \ + void WrapperD3D12_Device_PartiallyFillFunctionTable##API(WrapperD3D12Interface& table) { \ + table.CreateCommandBufferD3D12 = ::CreateCommandBufferD3D12; \ + table.CreateDescriptorPoolD3D12 = ::CreateDescriptorPoolD3D12; \ + table.CreateBufferD3D12 = ::CreateBufferD3D12; \ + table.CreateTextureD3D12 = ::CreateTextureD3D12; \ + table.CreateMemoryD3D12 = ::CreateMemoryD3D12; \ + table.CreateAccelerationStructureD3D12 = ::CreateAccelerationStructureD3D12; \ + } + +#define Define_WrapperVK_Device_PartiallyFillFunctionTable(API) \ + void WrapperVK_Device_PartiallyFillFunctionTable##API(WrapperVKInterface& table) { \ + table.CreateCommandQueueVK = ::CreateCommandQueueVK; \ + table.CreateCommandAllocatorVK = ::CreateCommandAllocatorVK; \ + table.CreateCommandBufferVK = ::CreateCommandBufferVK; \ + table.CreateDescriptorPoolVK = ::CreateDescriptorPoolVK; \ + table.CreateBufferVK = ::CreateBufferVK; \ + table.CreateTextureVK = ::CreateTextureVK; \ + table.CreateMemoryVK = ::CreateMemoryVK; \ + table.CreateGraphicsPipelineVK = ::CreateGraphicsPipelineVK; \ + table.CreateComputePipelineVK = ::CreateComputePipelineVK; \ + table.CreateQueryPoolVK = ::CreateQueryPoolVK; \ + table.CreateAccelerationStructureVK = ::CreateAccelerationStructureVK; \ + table.GetPhysicalDeviceVK = ::GetPhysicalDeviceVK; \ + table.GetInstanceVK = ::GetInstanceVK; \ + table.GetDeviceProcAddrVK = ::GetDeviceProcAddrVK; \ + table.GetInstanceProcAddrVK = ::GetInstanceProcAddrVK; \ } diff --git a/Source/Shared/HelperDataUpload.cpp b/Source/Shared/HelperDataUpload.cpp index 5bd73a29..b6389098 100644 --- a/Source/Shared/HelperDataUpload.cpp +++ b/Source/Shared/HelperDataUpload.cpp @@ -93,7 +93,11 @@ Result HelperDataUpload::UploadData( result = UploadBuffers(bufferUploadDescs, bufferUploadDescNum); } - Destroy(); + NRI.DestroyCommandBuffer(*m_CommandBuffer); + NRI.DestroyCommandAllocator(*m_CommandAllocators); + NRI.DestroyFence(*m_Fence); + NRI.DestroyBuffer(*m_UploadBuffer); + NRI.FreeMemory(*m_UploadBufferMemory); return result; } @@ -137,14 +141,6 @@ Result HelperDataUpload::Create() { return result; } -void HelperDataUpload::Destroy() { - NRI.DestroyCommandBuffer(*m_CommandBuffer); - NRI.DestroyCommandAllocator(*m_CommandAllocators); - NRI.DestroyFence(*m_Fence); - NRI.DestroyBuffer(*m_UploadBuffer); - NRI.FreeMemory(*m_UploadBufferMemory); -} - Result HelperDataUpload::UploadTextures(const TextureUploadDesc* textureUploadDescs, uint32_t textureDataDescNum) { if (!textureDataDescNum) return Result::SUCCESS; diff --git a/Source/Shared/HelperDataUpload.h b/Source/Shared/HelperDataUpload.h index b9a320de..adf1cc15 100644 --- a/Source/Shared/HelperDataUpload.h +++ b/Source/Shared/HelperDataUpload.h @@ -11,7 +11,6 @@ struct HelperDataUpload { private: nri::Result Create(); - void Destroy(); nri::Result UploadTextures(const nri::TextureUploadDesc* textureDataDescs, uint32_t textureDataDescNum); nri::Result UploadBuffers(const nri::BufferUploadDesc* bufferDataDescs, uint32_t bufferDataDescNum); nri::Result EndCommandBuffersAndSubmit(); diff --git a/Source/Shared/HelperDeviceMemoryAllocator.cpp b/Source/Shared/HelperDeviceMemoryAllocator.cpp index d51ecbf7..1c3f3642 100644 --- a/Source/Shared/HelperDeviceMemoryAllocator.cpp +++ b/Source/Shared/HelperDeviceMemoryAllocator.cpp @@ -125,7 +125,7 @@ HelperDeviceMemoryAllocator::MemoryHeap& HelperDeviceMemoryAllocator::FindOrCrea size_t j = 0; for (; j < m_Heaps.size(); j++) { const MemoryHeap& heap = m_Heaps[j]; - + uint64_t offset = Align(heap.size, memoryDesc.alignment); uint64_t newSize = offset + memoryDesc.size; diff --git a/Source/Shared/SharedExternal.cpp b/Source/Shared/SharedExternal.cpp index cd10fb87..d8ad2121 100644 --- a/Source/Shared/SharedExternal.cpp +++ b/Source/Shared/SharedExternal.cpp @@ -853,10 +853,10 @@ void nri::DeviceBase::ReportMessage(nri::Message messageType, const char* file, char message[4096]; int32_t written = 0; - if (desc.adapterDesc.description[0] == '\0') + if (desc.adapterDesc.name[0] == '\0') written = snprintf(message, GetCountOf(message), "NRI::%s(%s:%u) - %s::Unknown - ", messageTypeName, file, line, graphicsAPIName); else - written = snprintf(message, GetCountOf(message), "NRI::%s(%s:%u) - %s::%s - ", messageTypeName, file, line, graphicsAPIName, desc.adapterDesc.description); + written = snprintf(message, GetCountOf(message), "NRI::%s(%s:%u) - %s::%s - ", messageTypeName, file, line, graphicsAPIName, desc.adapterDesc.name); va_list argptr; va_start(argptr, format); diff --git a/Source/Shared/SharedExternal.h b/Source/Shared/SharedExternal.h index b7eec38d..d3e12e7d 100644 --- a/Source/Shared/SharedExternal.h +++ b/Source/Shared/SharedExternal.h @@ -9,6 +9,7 @@ #include "Extensions/NRILowLatency.h" #include "Extensions/NRIMeshShader.h" #include "Extensions/NRIRayTracing.h" +#include "Extensions/NRIResourceAllocator.h" #include "Extensions/NRIStreamer.h" #include "Extensions/NRISwapChain.h" #include "Extensions/NRIWrapperD3D11.h" @@ -23,7 +24,7 @@ #include "Lock.h" -typedef nri::MemoryAllocatorInterface MemoryAllocatorInterface; +typedef nri::AllocationCallbacks AllocationCallbacks; #include "StdAllocator.h" #ifdef _WIN32 @@ -147,8 +148,7 @@ void* GetSharedLibraryFunction(Library& library, const char* name); void UnloadSharedLibrary(Library& library); extern const char* VULKAN_LOADER_NAME; -//================================================================================================================ -// TODO: This code is Windows/D3D specific, so it's probably better to move it into a separate header +// Windows/D3D specific #ifdef _WIN32 struct IUnknown; diff --git a/Source/Shared/StdAllocator.h b/Source/Shared/StdAllocator.h index fe9ec25f..c4ca5f5f 100644 --- a/Source/Shared/StdAllocator.h +++ b/Source/Shared/StdAllocator.h @@ -43,8 +43,7 @@ inline void* AlignedMalloc(void* userArg, size_t size, size_t alignment) { StdAllocator_MaybeUnused(userArg); uint8_t* memory = (uint8_t*)malloc(size + sizeof(uint8_t*) + alignment - 1); - - if (memory == nullptr) + if (!memory) return nullptr; uint8_t* alignedMemory = Align(memory + sizeof(uint8_t*), alignment); @@ -55,14 +54,14 @@ inline void* AlignedMalloc(void* userArg, size_t size, size_t alignment) { } inline void* AlignedRealloc(void* userArg, void* memory, size_t size, size_t alignment) { - if (memory == nullptr) + if (!memory) return AlignedMalloc(userArg, size, alignment); uint8_t** memoryHeader = (uint8_t**)memory - 1; uint8_t* oldMemory = *memoryHeader; - uint8_t* newMemory = (uint8_t*)realloc(oldMemory, size + sizeof(uint8_t*) + alignment - 1); - if (newMemory == nullptr) + uint8_t* newMemory = (uint8_t*)realloc(oldMemory, size + sizeof(uint8_t*) + alignment - 1); + if (!newMemory) return nullptr; if (newMemory == oldMemory) @@ -78,7 +77,7 @@ inline void* AlignedRealloc(void* userArg, void* memory, size_t size, size_t ali inline void AlignedFree(void* userArg, void* memory) { StdAllocator_MaybeUnused(userArg); - if (memory == nullptr) + if (!memory) return; uint8_t** memoryHeader = (uint8_t**)memory - 1; @@ -88,13 +87,13 @@ inline void AlignedFree(void* userArg, void* memory) { #endif -inline void CheckAndSetDefaultAllocator(MemoryAllocatorInterface& memoryAllocatorInterface) { - if (memoryAllocatorInterface.Allocate != nullptr) +inline void CheckAndSetDefaultAllocator(AllocationCallbacks& allocationCallbacks) { + if (allocationCallbacks.Allocate) return; - memoryAllocatorInterface.Allocate = AlignedMalloc; - memoryAllocatorInterface.Reallocate = AlignedRealloc; - memoryAllocatorInterface.Free = AlignedFree; + allocationCallbacks.Allocate = AlignedMalloc; + allocationCallbacks.Reallocate = AlignedRealloc; + allocationCallbacks.Free = AlignedFree; } template @@ -105,7 +104,7 @@ struct StdAllocator { typedef std::true_type propagate_on_container_move_assignment; typedef std::false_type is_always_equal; - StdAllocator(const MemoryAllocatorInterface& memoryAllocatorInterface) : m_Interface(memoryAllocatorInterface) { + StdAllocator(const AllocationCallbacks& allocationCallbacks) : m_Interface(allocationCallbacks) { } StdAllocator(const StdAllocator& allocator) : m_Interface(allocator.GetInterface()) { @@ -128,7 +127,7 @@ struct StdAllocator { m_Interface.Free(m_Interface.userArg, memory); } - const MemoryAllocatorInterface& GetInterface() const { + const AllocationCallbacks& GetInterface() const { return m_Interface; } @@ -136,7 +135,7 @@ struct StdAllocator { using other = StdAllocator; private: - MemoryAllocatorInterface m_Interface = {}; + AllocationCallbacks m_Interface = {}; }; template @@ -186,40 +185,24 @@ inline T* Allocate(StdAllocator& allocator, Args&&... args) { return object; } -template -inline T* AllocateArray(StdAllocator& allocator, size_t arraySize, Args&&... args) { - const auto& lowLevelAllocator = allocator.GetInterface(); - T* array = (T*)lowLevelAllocator.Allocate(lowLevelAllocator.userArg, arraySize * sizeof(T), alignof(T)); - - if (array) { - for (size_t i = 0; i < arraySize; i++) - new (array + i) T(std::forward(args)...); - } - - return array; -} - template -inline void Deallocate(StdAllocator& allocator, T* object) { - if (!object) - return; - - object->~T(); +inline void Destroy(StdAllocator& allocator, T* object) { + if (object) { + object->~T(); - const auto& lowLevelAllocator = allocator.GetInterface(); - lowLevelAllocator.Free(lowLevelAllocator.userArg, object); + const auto& lowLevelAllocator = allocator.GetInterface(); + lowLevelAllocator.Free(lowLevelAllocator.userArg, object); + } } template -inline void DeallocateArray(StdAllocator& allocator, T* array, size_t arraySize) { - if (!array) - return; +inline void Destroy(T* object) { + if (object) { + object->~T(); - for (size_t i = 0; i < arraySize; i++) - (array + i)->~T(); - - const auto& lowLevelAllocator = allocator.GetInterface(); - lowLevelAllocator.Free(lowLevelAllocator.userArg, array); + const auto& lowLevelAllocator = ((nri::DeviceBase&)object->GetDevice()).GetStdAllocator().GetInterface(); + lowLevelAllocator.Free(lowLevelAllocator.userArg, object); + } } //================================================================================================================ @@ -234,19 +217,28 @@ using String = std::basic_string, StdAllocator -constexpr size_t CountStackAllocationSize(size_t arraySize) { - return arraySize * sizeof(T) + alignof(T); -} +struct Scratch { + T* mem; + const AllocationCallbacks& allocator; + bool isHeap; + + ~Scratch() { + if (isHeap) + allocator.Free(allocator.userArg, mem); + } + + inline operator T*() const { + return mem; + } +}; -#define ALLOCATE_SCRATCH(device, T, arraySize) \ - (CountStackAllocationSize(arraySize) <= STACK_ALLOC_MAX_SIZE) ? Align(((arraySize) ? (T*)_alloca(CountStackAllocationSize(arraySize)) : nullptr), alignof(T)) \ - : AllocateArray((device).GetStdAllocator(), arraySize); +constexpr size_t MAX_STACK_ALLOC_SIZE = 128 * 1024; -#define FREE_SCRATCH(device, array, arraySize) \ - if (array != nullptr && CountStackAllocationSize(arraySize) > STACK_ALLOC_MAX_SIZE) \ - DeallocateArray((device).GetStdAllocator(), array, arraySize); +#define AllocateScratch(device, T, elementNum) \ + {((elementNum) * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE \ + ? (T*)(device).GetStdAllocator().GetInterface().Allocate((device).GetStdAllocator().GetInterface().userArg, (elementNum) * sizeof(T), alignof(T)) \ + : (T*)Align((elementNum) ? (T*)_alloca(((elementNum) * sizeof(T) + alignof(T))) : nullptr, alignof(T)), \ + (device).GetStdAllocator().GetInterface(), ((elementNum) * sizeof(T) + alignof(T)) > MAX_STACK_ALLOC_SIZE} -#define STACK_ALLOC(T, arraySize) Align(((arraySize) ? (T*)_alloca(CountStackAllocationSize(arraySize)) : nullptr), alignof(T)) +#define StackAlloc(T, elementNum) Align(((elementNum) ? (T*)_alloca((elementNum) * sizeof(T) + alignof(T)) : nullptr), alignof(T)) diff --git a/Source/Shared/Streamer.cpp b/Source/Shared/Streamer.cpp index 930a97ef..f0b9686e 100644 --- a/Source/Shared/Streamer.cpp +++ b/Source/Shared/Streamer.cpp @@ -103,7 +103,7 @@ uint64_t StreamerImpl::AddStreamerTextureUpdateRequest(const TextureUpdateReques uint64_t alignedSize = alignedSlicePitch * d; uint64_t offset = m_DynamicDataOffsetBase + m_DynamicDataOffset; - m_TextureRequests.push_back({textureUpdateRequestDesc, m_DynamicDataOffset}); // store local offset + m_TextureRequests.push_back({textureUpdateRequestDesc, m_DynamicDataOffset}); // store local offset m_DynamicDataOffset += alignedSize; return offset; diff --git a/Source/VK/AccelerationStructureVK.cpp b/Source/VK/AccelerationStructureVK.cpp index 822c31e9..0ce96b90 100644 --- a/Source/VK/AccelerationStructureVK.cpp +++ b/Source/VK/AccelerationStructureVK.cpp @@ -11,112 +11,54 @@ using namespace nri; AccelerationStructureVK::~AccelerationStructureVK() { - const auto& vk = m_Device.GetDispatchTable(); - - if (!m_OwnsNativeObjects) - return; - - vk.DestroyAccelerationStructureKHR(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); + vk.DestroyAccelerationStructureKHR(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); - if (m_Buffer) - m_Device.DestroyBuffer(*(Buffer*)m_Buffer); + Destroy(m_Buffer); + } } Result AccelerationStructureVK::Create(const AccelerationStructureDesc& accelerationStructureDesc) { - m_OwnsNativeObjects = true; - m_Type = GetAccelerationStructureType(accelerationStructureDesc.type); - m_BuildFlags = GetAccelerationStructureBuildFlags(accelerationStructureDesc.flags); - - if (accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL) - PrecreateBottomLevel(accelerationStructureDesc); - else - PrecreateTopLevel(accelerationStructureDesc); + VkAccelerationStructureBuildSizesInfoKHR sizesInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; + m_Device.GetAccelerationStructureBuildSizesInfo(accelerationStructureDesc, sizesInfo); BufferDesc bufferDesc = {}; - bufferDesc.size = m_AccelerationStructureSize; + bufferDesc.size = sizesInfo.accelerationStructureSize; bufferDesc.usageMask = BufferUsageBits::RAY_TRACING_BUFFER; Buffer* buffer = nullptr; - const Result result = m_Device.CreateBuffer(bufferDesc, buffer); - m_Buffer = (BufferVK*)buffer; + Result result = m_Device.CreateImplementation(buffer, bufferDesc); + if (result == Result::SUCCESS) { + m_Buffer = (BufferVK*)buffer; + m_BuildScratchSize = sizesInfo.buildScratchSize; + m_UpdateScratchSize = sizesInfo.updateScratchSize; + m_Type = GetAccelerationStructureType(accelerationStructureDesc.type); + m_AccelerationStructureSize = sizesInfo.accelerationStructureSize; + } return result; } Result AccelerationStructureVK::Create(const AccelerationStructureVKDesc& accelerationStructureDesc) { - m_OwnsNativeObjects = false; - - VkAccelerationStructureDeviceAddressInfoKHR deviceAddressInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR}; - deviceAddressInfo.accelerationStructure = (VkAccelerationStructureKHR)accelerationStructureDesc.vkAccelerationStructure; + if (!accelerationStructureDesc.vkAccelerationStructure) + return Result::INVALID_ARGUMENT; - const auto& vk = m_Device.GetDispatchTable(); - const uint64_t deviceAddress = vk.GetAccelerationStructureDeviceAddressKHR(m_Device, &deviceAddressInfo); - - if (deviceAddress == 0) - return Result::FAILURE; - - m_Type = VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_KHR; - m_BuildFlags = 0; + m_OwnsNativeObjects = false; m_Handle = (VkAccelerationStructureKHR)accelerationStructureDesc.vkAccelerationStructure; - m_DeviceAddress = deviceAddress; m_BuildScratchSize = accelerationStructureDesc.buildScratchSize; m_UpdateScratchSize = accelerationStructureDesc.updateScratchSize; - return Result::SUCCESS; -} - -void AccelerationStructureVK::PrecreateBottomLevel(const AccelerationStructureDesc& accelerationStructureDesc) { - const auto buildType = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR; - - VkAccelerationStructureBuildGeometryInfoKHR buildInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR}; - buildInfo.type = m_Type; - buildInfo.flags = m_BuildFlags; - buildInfo.geometryCount = accelerationStructureDesc.instanceOrGeometryObjectNum; - - VkAccelerationStructureBuildSizesInfoKHR sizeInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; + // Device address + if (m_Device.m_IsDeviceAddressSupported) { + VkAccelerationStructureDeviceAddressInfoKHR deviceAddressInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR}; + deviceAddressInfo.accelerationStructure = (VkAccelerationStructureKHR)accelerationStructureDesc.vkAccelerationStructure; - Vector geometries(m_Device.GetStdAllocator()); - uint32_t* primitiveMaxNums = ALLOCATE_SCRATCH(m_Device, uint32_t, accelerationStructureDesc.instanceOrGeometryObjectNum); - - geometries.resize(accelerationStructureDesc.instanceOrGeometryObjectNum); - ConvertGeometryObjectSizesVK(geometries.data(), primitiveMaxNums, accelerationStructureDesc.geometryObjects, (uint32_t)geometries.size()); - buildInfo.pGeometries = geometries.data(); - - const auto& vk = m_Device.GetDispatchTable(); - vk.GetAccelerationStructureBuildSizesKHR(m_Device, buildType, &buildInfo, primitiveMaxNums, &sizeInfo); - - m_BuildScratchSize = sizeInfo.buildScratchSize; - m_UpdateScratchSize = sizeInfo.updateScratchSize; - m_AccelerationStructureSize = sizeInfo.accelerationStructureSize; - - FREE_SCRATCH(m_Device, primitiveMaxNums, accelerationStructureDesc.instanceOrGeometryObjectNum); -} - -void AccelerationStructureVK::PrecreateTopLevel(const AccelerationStructureDesc& accelerationStructureDesc) { - const auto buildType = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR; - - VkAccelerationStructureGeometryKHR geometry = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR}; - geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; - geometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; - geometry.geometry.aabbs.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR; - geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; - - VkAccelerationStructureBuildGeometryInfoKHR buildInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR}; - buildInfo.type = m_Type; - buildInfo.flags = m_BuildFlags; - buildInfo.geometryCount = 1; - buildInfo.pGeometries = &geometry; - - const uint32_t instanceMaxNum = accelerationStructureDesc.instanceOrGeometryObjectNum; - - VkAccelerationStructureBuildSizesInfoKHR sizeInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; - - const auto& vk = m_Device.GetDispatchTable(); - vk.GetAccelerationStructureBuildSizesKHR(m_Device, buildType, &buildInfo, &instanceMaxNum, &sizeInfo); + const auto& vk = m_Device.GetDispatchTable(); + m_DeviceAddress = vk.GetAccelerationStructureDeviceAddressKHR(m_Device, &deviceAddressInfo); + } - m_BuildScratchSize = sizeInfo.buildScratchSize; - m_UpdateScratchSize = sizeInfo.updateScratchSize; - m_AccelerationStructureSize = sizeInfo.accelerationStructureSize; + return m_DeviceAddress ? Result::SUCCESS : Result::FAILURE; } Result AccelerationStructureVK::FinishCreation() { @@ -132,12 +74,14 @@ Result AccelerationStructureVK::FinishCreation() { VkResult result = vk.CreateAccelerationStructureKHR(m_Device, &accelerationStructureCreateInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateAccelerationStructureKHR returned %d", (int32_t)result); - VkAccelerationStructureDeviceAddressInfoKHR deviceAddressInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR}; - deviceAddressInfo.accelerationStructure = m_Handle; - - m_DeviceAddress = vk.GetAccelerationStructureDeviceAddressKHR(m_Device, &deviceAddressInfo); + // Device address + if (m_Device.m_IsDeviceAddressSupported) { + VkAccelerationStructureDeviceAddressInfoKHR deviceAddressInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR}; + deviceAddressInfo.accelerationStructure = m_Handle; + m_DeviceAddress = vk.GetAccelerationStructureDeviceAddressKHR(m_Device, &deviceAddressInfo); + } - return Result::SUCCESS; + return m_DeviceAddress ? Result::SUCCESS : Result::FAILURE; } //================================================================================================================ @@ -146,16 +90,14 @@ Result AccelerationStructureVK::FinishCreation() { inline void AccelerationStructureVK::SetDebugName(const char* name) { m_Device.SetDebugNameToTrivialObject(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)m_Handle, name); - m_Buffer->SetDebugName(name); -} -inline void AccelerationStructureVK::GetMemoryDesc(MemoryDesc& memoryDesc) const { - m_Device.GetMemoryDesc(m_Buffer->GetDesc(), MemoryLocation::DEVICE, memoryDesc); + if (m_Buffer) + m_Buffer->SetDebugName(name); } inline Result AccelerationStructureVK::CreateDescriptor(Descriptor*& descriptor) const { DescriptorVK* descriptorImpl = Allocate(m_Device.GetStdAllocator(), m_Device); - + Result result = descriptorImpl->Create(m_Handle); if (result == Result::SUCCESS) { @@ -163,7 +105,7 @@ inline Result AccelerationStructureVK::CreateDescriptor(Descriptor*& descriptor) return Result::SUCCESS; } - Deallocate(m_Device.GetStdAllocator(), descriptorImpl); + Destroy(m_Device.GetStdAllocator(), descriptorImpl); return Result::SUCCESS; } diff --git a/Source/VK/AccelerationStructureVK.h b/Source/VK/AccelerationStructureVK.h index e800b4f7..233c1d26 100644 --- a/Source/VK/AccelerationStructureVK.h +++ b/Source/VK/AccelerationStructureVK.h @@ -23,14 +23,11 @@ struct AccelerationStructureVK { return m_Buffer; } - inline bool OwnsNativeObjects() const { - return m_OwnsNativeObjects; - } - ~AccelerationStructureVK(); Result Create(const AccelerationStructureDesc& accelerationStructureDesc); Result Create(const AccelerationStructureVKDesc& accelerationStructureDesc); + Result Create(const AllocateAccelerationStructureDesc& accelerationStructureDesc); Result FinishCreation(); //================================================================================================================ @@ -38,25 +35,20 @@ struct AccelerationStructureVK { //================================================================================================================ inline uint64_t GetUpdateScratchBufferSize() const { - return Align(m_UpdateScratchSize, m_Device.GetDesc().rayTracingScratchAlignment); + return m_UpdateScratchSize; } inline uint64_t GetBuildScratchBufferSize() const { - return Align(m_BuildScratchSize, m_Device.GetDesc().rayTracingScratchAlignment); + return m_BuildScratchSize; } - inline VkDeviceAddress GetNativeHandle() const { + inline VkDeviceAddress GetDeviceAddress() const { return m_DeviceAddress; } void SetDebugName(const char* name); - void GetMemoryDesc(MemoryDesc& memoryDesc) const; Result CreateDescriptor(Descriptor*& descriptor) const; -private: - void PrecreateBottomLevel(const AccelerationStructureDesc& accelerationStructureDesc); - void PrecreateTopLevel(const AccelerationStructureDesc& accelerationStructureDesc); - private: DeviceVK& m_Device; VkAccelerationStructureKHR m_Handle = VK_NULL_HANDLE; @@ -64,10 +56,9 @@ struct AccelerationStructureVK { BufferVK* m_Buffer = nullptr; uint64_t m_BuildScratchSize = 0; uint64_t m_UpdateScratchSize = 0; - uint64_t m_AccelerationStructureSize = 0; - VkAccelerationStructureTypeKHR m_Type = (VkAccelerationStructureTypeKHR)0; - VkBuildAccelerationStructureFlagsKHR m_BuildFlags = (VkBuildAccelerationStructureFlagsKHR)0; - bool m_OwnsNativeObjects = false; + uint64_t m_AccelerationStructureSize = 0; // needed only for FinishCreation + VkAccelerationStructureTypeKHR m_Type = (VkAccelerationStructureTypeKHR)0; // needed only for FinishCreation + bool m_OwnsNativeObjects = true; }; } // namespace nri \ No newline at end of file diff --git a/Source/VK/AcceleratrionStructureVK.hpp b/Source/VK/AcceleratrionStructureVK.hpp index 3ec44d34..d51d0029 100644 --- a/Source/VK/AcceleratrionStructureVK.hpp +++ b/Source/VK/AcceleratrionStructureVK.hpp @@ -2,10 +2,6 @@ #pragma region[ RayTracing ] -static void NRI_CALL GetAccelerationStructureMemoryDesc(const AccelerationStructure& accelerationStructure, MemoryDesc& memoryDesc) { - ((AccelerationStructureVK&)accelerationStructure).GetMemoryDesc(memoryDesc); -} - static uint64_t NRI_CALL GetAccelerationStructureUpdateScratchBufferSize(const AccelerationStructure& accelerationStructure) { return ((AccelerationStructureVK&)accelerationStructure).GetUpdateScratchBufferSize(); } @@ -16,7 +12,7 @@ static uint64_t NRI_CALL GetAccelerationStructureBuildScratchBufferSize(const Ac static uint64_t NRI_CALL GetAccelerationStructureHandle(const AccelerationStructure& accelerationStructure) { static_assert(sizeof(uint64_t) == sizeof(VkDeviceAddress), "type mismatch"); - return (uint64_t)((AccelerationStructureVK&)accelerationStructure).GetNativeHandle(); + return (uint64_t)((AccelerationStructureVK&)accelerationStructure).GetDeviceAddress(); } static void NRI_CALL SetAccelerationStructureDebugName(AccelerationStructure& accelerationStructure, const char* name) { diff --git a/Source/VK/BufferVK.cpp b/Source/VK/BufferVK.cpp index 898667b9..05e78f2f 100644 --- a/Source/VK/BufferVK.cpp +++ b/Source/VK/BufferVK.cpp @@ -9,14 +9,17 @@ using namespace nri; BufferVK::~BufferVK() { - const auto& vk = m_Device.GetDispatchTable(); - - if (m_OwnsNativeObjects) - vk.DestroyBuffer(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); + + if (m_VmaAllocation) + DestroyVma(); + else + vk.DestroyBuffer(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result BufferVK::Create(const BufferDesc& bufferDesc) { - m_OwnsNativeObjects = true; m_Desc = bufferDesc; VkBufferCreateInfo info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; @@ -30,33 +33,42 @@ Result BufferVK::Create(const BufferDesc& bufferDesc) { } Result BufferVK::Create(const BufferVKDesc& bufferDesc) { + if (!bufferDesc.vkBuffer) + return Result::INVALID_ARGUMENT; + m_OwnsNativeObjects = false; - m_Memory = (MemoryVK*)bufferDesc.memory; - m_MappedMemoryOffset = bufferDesc.memoryOffset; + m_Handle = (VkBuffer)bufferDesc.vkBuffer; + m_MappedMemory = bufferDesc.mappedMemory; + m_NonCoherentDeviceMemory = (VkDeviceMemory)bufferDesc.vkDeviceMemory; + m_DeviceAddress = (VkDeviceAddress)bufferDesc.deviceAddress; m_Desc.size = bufferDesc.size; m_Desc.structureStride = bufferDesc.structureStride; - m_Handle = (VkBuffer)bufferDesc.vkBuffer; - m_DeviceAddress = (VkDeviceAddress)bufferDesc.deviceAddress; - return Result::SUCCESS; } -void BufferVK::SetHostMemory(MemoryVK& memory, uint64_t memoryOffset) { - m_Memory = &memory; - m_MappedMemoryOffset = memoryOffset; -} +void BufferVK::FinishMemoryBinding(MemoryVK& memory, uint64_t memoryOffset) { + CHECK(m_OwnsNativeObjects, "Not for wrapped objects"); -void BufferVK::ReadDeviceAddress() { - if (!m_Device.m_IsDeviceAddressSupported) - return; + // Mapped memory + MemoryTypeInfo memoryTypeInfo = Unpack(memory.GetType()); + if (IsHostVisibleMemory(memoryTypeInfo.location)) { + m_MappedMemory = memory.GetMappedMemory(); + m_MappedMemoryOffset = memoryOffset; - VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO}; - bufferDeviceAddressInfo.buffer = m_Handle; + if (!m_Device.IsHostCoherentMemory(memoryTypeInfo.index)) + m_NonCoherentDeviceMemory = memory.GetHandle(); + } - const auto& vk = m_Device.GetDispatchTable(); - m_DeviceAddress = vk.GetBufferDeviceAddress(m_Device, &bufferDeviceAddressInfo); + // Device address + if (m_Device.m_IsDeviceAddressSupported) { + VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO}; + bufferDeviceAddressInfo.buffer = m_Handle; + + const auto& vk = m_Device.GetDispatchTable(); + m_DeviceAddress = vk.GetBufferDeviceAddress(m_Device, &bufferDeviceAddressInfo); + } } //================================================================================================================ @@ -68,21 +80,30 @@ void BufferVK::SetDebugName(const char* name) { } inline void* BufferVK::Map(uint64_t offset, uint64_t size) { - CHECK(m_Memory != nullptr, "The buffer does not support memory mapping."); - - m_MappedRangeOffset = offset; - m_MappedRangeSize = size; + CHECK(m_MappedMemory, "The buffer does not support memory mapping"); if (size == WHOLE_SIZE) size = m_Desc.size; - return m_Memory->GetMappedMemory() + m_MappedMemoryOffset + offset; + m_MappedMemoryRangeSize = size; + m_MappedMemoryRangeOffset = offset; + + offset += m_MappedMemoryOffset; + + return m_MappedMemory + offset; } inline void BufferVK::Unmap() { - // TODO: flush memory range if not HOST_COHERENT - // if (m_Memory->IsHostCoherent()) - // m_Memory->FlushMemoryRange(m_MappedMemoryOffset + m_MappedRangeOffset, m_MappedRangeSize); + if (m_NonCoherentDeviceMemory) { + VkMappedMemoryRange memoryRange = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE}; + memoryRange.memory = m_NonCoherentDeviceMemory; + memoryRange.offset = m_MappedMemoryOffset + m_MappedMemoryRangeOffset; + memoryRange.size = m_MappedMemoryRangeSize; + + const auto& vk = m_Device.GetDispatchTable(); + VkResult result = vk.FlushMappedMemoryRanges(m_Device, 1, &memoryRange); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, ReturnVoid(), "vkFlushMappedMemoryRanges returned %d", (int32_t)result); + } } #include "BufferVK.hpp" \ No newline at end of file diff --git a/Source/VK/BufferVK.h b/Source/VK/BufferVK.h index debcd6d4..0945f722 100644 --- a/Source/VK/BufferVK.h +++ b/Source/VK/BufferVK.h @@ -27,16 +27,13 @@ struct BufferVK { return m_Desc; } - inline bool OwnsNativeObjects() const { - return m_OwnsNativeObjects; - } - ~BufferVK(); Result Create(const BufferDesc& bufferDesc); Result Create(const BufferVKDesc& bufferDesc); - void SetHostMemory(MemoryVK& memory, uint64_t memoryOffset); - void ReadDeviceAddress(); + Result Create(const AllocateBufferDesc& bufferDesc); + void FinishMemoryBinding(MemoryVK& memory, uint64_t memoryOffset); + void DestroyVma(); //================================================================================================================ // NRI @@ -49,13 +46,15 @@ struct BufferVK { private: DeviceVK& m_Device; VkBuffer m_Handle = VK_NULL_HANDLE; - VkDeviceAddress m_DeviceAddress = {}; - BufferDesc m_Desc = {}; - MemoryVK* m_Memory = nullptr; + VkDeviceAddress m_DeviceAddress = 0; + uint8_t* m_MappedMemory = nullptr; + VkDeviceMemory m_NonCoherentDeviceMemory = VK_NULL_HANDLE; uint64_t m_MappedMemoryOffset = 0; - uint64_t m_MappedRangeOffset = 0; - uint64_t m_MappedRangeSize = 0; - bool m_OwnsNativeObjects = false; + uint64_t m_MappedMemoryRangeSize = 0; + uint64_t m_MappedMemoryRangeOffset = 0; + BufferDesc m_Desc = {}; + VmaAllocation_T* m_VmaAllocation = nullptr; + bool m_OwnsNativeObjects = true; }; inline VkDeviceAddress GetBufferDeviceAddress(const Buffer* buffer) { diff --git a/Source/VK/CommandAllocatorVK.cpp b/Source/VK/CommandAllocatorVK.cpp index 3b4fe806..ec55e633 100644 --- a/Source/VK/CommandAllocatorVK.cpp +++ b/Source/VK/CommandAllocatorVK.cpp @@ -9,13 +9,13 @@ using namespace nri; CommandAllocatorVK::~CommandAllocatorVK() { - const auto& vk = m_Device.GetDispatchTable(); - if (m_Handle != VK_NULL_HANDLE && m_OwnsNativeObjects) + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); vk.DestroyCommandPool(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result CommandAllocatorVK::Create(const CommandQueue& commandQueue) { - m_OwnsNativeObjects = true; const CommandQueueVK& commandQueueImpl = (CommandQueueVK&)commandQueue; m_Type = commandQueueImpl.GetType(); @@ -30,6 +30,9 @@ Result CommandAllocatorVK::Create(const CommandQueue& commandQueue) { } Result CommandAllocatorVK::Create(const CommandAllocatorVKDesc& commandAllocatorDesc) { + if (!commandAllocatorDesc.vkCommandPool) + return Result::INVALID_ARGUMENT; + m_OwnsNativeObjects = false; m_Handle = (VkCommandPool)commandAllocatorDesc.vkCommandPool; m_Type = commandAllocatorDesc.commandQueueType; diff --git a/Source/VK/CommandAllocatorVK.h b/Source/VK/CommandAllocatorVK.h index fce19e76..7c3887a9 100644 --- a/Source/VK/CommandAllocatorVK.h +++ b/Source/VK/CommandAllocatorVK.h @@ -35,7 +35,7 @@ struct CommandAllocatorVK { DeviceVK& m_Device; VkCommandPool m_Handle = VK_NULL_HANDLE; CommandQueueType m_Type = (CommandQueueType)0; - bool m_OwnsNativeObjects = false; + bool m_OwnsNativeObjects = true; }; } // namespace nri diff --git a/Source/VK/CommandBufferVK.cpp b/Source/VK/CommandBufferVK.cpp index 50e8be55..8b82baad 100644 --- a/Source/VK/CommandBufferVK.cpp +++ b/Source/VK/CommandBufferVK.cpp @@ -73,7 +73,7 @@ inline Result CommandBufferVK::End() { } inline void CommandBufferVK::SetViewports(const Viewport* viewports, uint32_t viewportNum) { - VkViewport* vkViewports = STACK_ALLOC(VkViewport, viewportNum); + VkViewport* vkViewports = StackAlloc(VkViewport, viewportNum); for (uint32_t i = 0; i < viewportNum; i++) { const Viewport& viewport = viewports[i]; VkViewport& vkViewport = vkViewports[i]; @@ -94,7 +94,7 @@ inline void CommandBufferVK::SetViewports(const Viewport* viewports, uint32_t vi } inline void CommandBufferVK::SetScissors(const Rect* rects, uint32_t rectNum) { - VkRect2D* vkRects = STACK_ALLOC(VkRect2D, rectNum); + VkRect2D* vkRects = StackAlloc(VkRect2D, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& viewport = rects[i]; VkRect2D& vkRect = vkRects[i]; @@ -125,7 +125,7 @@ inline void CommandBufferVK::SetStencilReference(uint8_t frontRef, uint8_t backR } inline void CommandBufferVK::SetSamplePositions(const SamplePosition* positions, Sample_t positionNum, Sample_t sampleNum) { - VkSampleLocationEXT* locations = STACK_ALLOC(VkSampleLocationEXT, positionNum); + VkSampleLocationEXT* locations = StackAlloc(VkSampleLocationEXT, positionNum); for (uint32_t i = 0; i < positionNum; i++) locations[i] = {(float)(positions[i].x + 8) / 16.0f, (float)(positions[i].y + 8) / 16.0f}; @@ -147,7 +147,7 @@ inline void CommandBufferVK::SetBlendConstants(const Color32f& color) { } inline void CommandBufferVK::ClearAttachments(const ClearDesc* clearDescs, uint32_t clearDescNum, const Rect* rects, uint32_t rectNum) { - VkClearAttachment* attachments = STACK_ALLOC(VkClearAttachment, clearDescNum); + VkClearAttachment* attachments = StackAlloc(VkClearAttachment, clearDescNum); for (uint32_t i = 0; i < clearDescNum; i++) { const ClearDesc& desc = clearDescs[i]; @@ -177,7 +177,7 @@ inline void CommandBufferVK::ClearAttachments(const ClearDesc* clearDescs, uint3 VkClearRect* clearRects = nullptr; if (rectNum == 0) { - clearRects = STACK_ALLOC(VkClearRect, clearDescNum); + clearRects = StackAlloc(VkClearRect, clearDescNum); rectNum = clearDescNum; for (uint32_t i = 0; i < clearDescNum; i++) { @@ -187,7 +187,7 @@ inline void CommandBufferVK::ClearAttachments(const ClearDesc* clearDescs, uint3 clearRect.rect = {{0, 0}, {m_RenderWidth, m_RenderHeight}}; } } else { - clearRects = STACK_ALLOC(VkClearRect, rectNum); + clearRects = StackAlloc(VkClearRect, rectNum); for (uint32_t i = 0; i < rectNum; i++) { const Rect& rect = rects[i]; @@ -227,7 +227,7 @@ inline void CommandBufferVK::BeginRendering(const AttachmentsDesc& attachmentsDe VkRenderingAttachmentInfo* colors = nullptr; if (attachmentsDesc.colorNum) { - colors = STACK_ALLOC(VkRenderingAttachmentInfo, attachmentsDesc.colorNum); + colors = StackAlloc(VkRenderingAttachmentInfo, attachmentsDesc.colorNum); for (uint32_t i = 0; i < attachmentsDesc.colorNum; i++) { const DescriptorVK& descriptor = *(DescriptorVK*)attachmentsDesc.colors[i]; @@ -306,7 +306,7 @@ inline void CommandBufferVK::EndRendering() { } inline void CommandBufferVK::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { - VkBuffer* bufferHandles = STACK_ALLOC(VkBuffer, bufferNum); + VkBuffer* bufferHandles = StackAlloc(VkBuffer, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) bufferHandles[i] = GetHandle(buffers[i]); @@ -568,7 +568,7 @@ static inline VkAccessFlags2 GetAccessFlags(AccessBits accessBits) { inline void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDesc) { // Global - VkMemoryBarrier2* memoryBarriers = STACK_ALLOC(VkMemoryBarrier2, barrierGroupDesc.globalNum); + VkMemoryBarrier2* memoryBarriers = StackAlloc(VkMemoryBarrier2, barrierGroupDesc.globalNum); for (uint16_t i = 0; i < barrierGroupDesc.globalNum; i++) { const GlobalBarrierDesc& in = barrierGroupDesc.globals[i]; @@ -581,7 +581,7 @@ inline void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDesc) { } // Buffer - VkBufferMemoryBarrier2* bufferBarriers = STACK_ALLOC(VkBufferMemoryBarrier2, barrierGroupDesc.bufferNum); + VkBufferMemoryBarrier2* bufferBarriers = StackAlloc(VkBufferMemoryBarrier2, barrierGroupDesc.bufferNum); for (uint16_t i = 0; i < barrierGroupDesc.bufferNum; i++) { const BufferBarrierDesc& in = barrierGroupDesc.buffers[i]; const BufferVK& bufferImpl = *(const BufferVK*)in.buffer; @@ -600,7 +600,7 @@ inline void CommandBufferVK::Barrier(const BarrierGroupDesc& barrierGroupDesc) { } // Texture - VkImageMemoryBarrier2* textureBarriers = STACK_ALLOC(VkImageMemoryBarrier2, barrierGroupDesc.textureNum); + VkImageMemoryBarrier2* textureBarriers = StackAlloc(VkImageMemoryBarrier2, barrierGroupDesc.textureNum); for (uint16_t i = 0; i < barrierGroupDesc.textureNum; i++) { const TextureBarrierDesc& in = barrierGroupDesc.textures[i]; const TextureVK& textureImpl = *(const TextureVK*)in.texture; @@ -695,7 +695,7 @@ inline void CommandBufferVK::CopyWholeTexture(const TextureVK& dstTexture, const const TextureDesc& dstTextureDesc = dstTexture.GetDesc(); const TextureDesc& srcTextureDesc = srcTexture.GetDesc(); - VkImageCopy* regions = STACK_ALLOC(VkImageCopy, dstTextureDesc.mipNum); + VkImageCopy* regions = StackAlloc(VkImageCopy, dstTextureDesc.mipNum); for (Mip_t i = 0; i < dstTextureDesc.mipNum; i++) { regions[i].srcSubresource = {srcTexture.GetImageAspectFlags(), i, 0, srcTextureDesc.arraySize}; regions[i].dstSubresource = {dstTexture.GetImageAspectFlags(), i, 0, dstTextureDesc.arraySize}; @@ -744,8 +744,8 @@ inline void CommandBufferVK::BuildBottomLevelAccelerationStructure( const VkAccelerationStructureKHR dstASHandle = ((const AccelerationStructureVK&)dst).GetHandle(); const VkDeviceAddress scratchAddress = ((BufferVK&)scratch).GetDeviceAddress() + scratchOffset; - VkAccelerationStructureGeometryKHR* geometries = ALLOCATE_SCRATCH(m_Device, VkAccelerationStructureGeometryKHR, geometryObjectNum); - VkAccelerationStructureBuildRangeInfoKHR* ranges = ALLOCATE_SCRATCH(m_Device, VkAccelerationStructureBuildRangeInfoKHR, geometryObjectNum); + Scratch geometries = AllocateScratch(m_Device, VkAccelerationStructureGeometryKHR, geometryObjectNum); + Scratch ranges = AllocateScratch(m_Device, VkAccelerationStructureBuildRangeInfoKHR, geometryObjectNum); ConvertGeometryObjectsVK(geometries, ranges, geometryObjects, geometryObjectNum); @@ -762,9 +762,6 @@ inline void CommandBufferVK::BuildBottomLevelAccelerationStructure( const auto& vk = m_Device.GetDispatchTable(); vk.CmdBuildAccelerationStructuresKHR(m_Handle, 1, &buildGeometryInfo, rangeArrays); - - FREE_SCRATCH(m_Device, ranges, geometryObjectNum); - FREE_SCRATCH(m_Device, geometries, geometryObjectNum); } inline void CommandBufferVK::UpdateTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, @@ -806,8 +803,8 @@ inline void CommandBufferVK::UpdateBottomLevelAccelerationStructure(uint32_t geo const VkAccelerationStructureKHR dstASHandle = ((const AccelerationStructureVK&)dst).GetHandle(); const VkDeviceAddress scratchAddress = ((BufferVK&)scratch).GetDeviceAddress() + scratchOffset; - VkAccelerationStructureGeometryKHR* geometries = ALLOCATE_SCRATCH(m_Device, VkAccelerationStructureGeometryKHR, geometryObjectNum); - VkAccelerationStructureBuildRangeInfoKHR* ranges = ALLOCATE_SCRATCH(m_Device, VkAccelerationStructureBuildRangeInfoKHR, geometryObjectNum); + Scratch geometries = AllocateScratch(m_Device, VkAccelerationStructureGeometryKHR, geometryObjectNum); + Scratch ranges = AllocateScratch(m_Device, VkAccelerationStructureBuildRangeInfoKHR, geometryObjectNum); ConvertGeometryObjectsVK(geometries, ranges, geometryObjects, geometryObjectNum); @@ -825,9 +822,6 @@ inline void CommandBufferVK::UpdateBottomLevelAccelerationStructure(uint32_t geo const auto& vk = m_Device.GetDispatchTable(); vk.CmdBuildAccelerationStructuresKHR(m_Handle, 1, &buildGeometryInfo, rangeArrays); - - FREE_SCRATCH(m_Device, ranges, geometryObjectNum); - FREE_SCRATCH(m_Device, geometries, geometryObjectNum); } inline void CommandBufferVK::CopyAccelerationStructure(AccelerationStructure& dst, AccelerationStructure& src, CopyMode copyMode) { @@ -845,7 +839,7 @@ inline void CommandBufferVK::CopyAccelerationStructure(AccelerationStructure& ds inline void CommandBufferVK::WriteAccelerationStructureSize( const AccelerationStructure* const* accelerationStructures, uint32_t accelerationStructureNum, QueryPool& queryPool, uint32_t queryPoolOffset) { - VkAccelerationStructureKHR* ASes = ALLOCATE_SCRATCH(m_Device, VkAccelerationStructureKHR, accelerationStructureNum); + Scratch ASes = AllocateScratch(m_Device, VkAccelerationStructureKHR, accelerationStructureNum); for (uint32_t i = 0; i < accelerationStructureNum; i++) ASes[i] = ((const AccelerationStructureVK*)accelerationStructures[i])->GetHandle(); @@ -855,8 +849,6 @@ inline void CommandBufferVK::WriteAccelerationStructureSize( const auto& vk = m_Device.GetDispatchTable(); vk.CmdWriteAccelerationStructuresPropertiesKHR( m_Handle, accelerationStructureNum, ASes, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryPoolHandle, queryPoolOffset); - - FREE_SCRATCH(m_Device, ASes, accelerationStructureNum); } inline void CommandBufferVK::DispatchRays(const DispatchRaysDesc& dispatchRaysDesc) { diff --git a/Source/VK/CommandBufferVK.hpp b/Source/VK/CommandBufferVK.hpp index ee42aa97..04c3bff0 100644 --- a/Source/VK/CommandBufferVK.hpp +++ b/Source/VK/CommandBufferVK.hpp @@ -160,14 +160,6 @@ static void NRI_CALL CmdResetQueries(CommandBuffer& commandBuffer, const QueryPo ((CommandBufferVK&)commandBuffer).ResetQueries(queryPool, offset, num); } -static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { - if (!(&commandBuffer)) - return; - - StdAllocator& allocator = ((CommandBufferVK&)commandBuffer).GetDevice().GetStdAllocator(); - Deallocate(allocator, (CommandBufferVK*)&commandBuffer); -} - static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandBuffer) { if (!(&commandBuffer)) return nullptr; diff --git a/Source/VK/CommandQueueVK.cpp b/Source/VK/CommandQueueVK.cpp index 67c01203..857061eb 100644 --- a/Source/VK/CommandQueueVK.cpp +++ b/Source/VK/CommandQueueVK.cpp @@ -29,7 +29,7 @@ inline void CommandQueueVK::SetDebugName(const char* name) { inline void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain) { ExclusiveScope lock(m_Lock); - VkSemaphoreSubmitInfo* waitSemaphores = STACK_ALLOC(VkSemaphoreSubmitInfo, queueSubmitDesc.waitFenceNum); + VkSemaphoreSubmitInfo* waitSemaphores = StackAlloc(VkSemaphoreSubmitInfo, queueSubmitDesc.waitFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { waitSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; waitSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.waitFences[i].fence; @@ -37,13 +37,13 @@ inline void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc, const waitSemaphores[i].stageMask = GetPipelineStageFlags(queueSubmitDesc.waitFences[i].stages); } - VkCommandBufferSubmitInfo* commandBuffers = STACK_ALLOC(VkCommandBufferSubmitInfo, queueSubmitDesc.commandBufferNum); + VkCommandBufferSubmitInfo* commandBuffers = StackAlloc(VkCommandBufferSubmitInfo, queueSubmitDesc.commandBufferNum); for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) { commandBuffers[i] = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO}; commandBuffers[i].commandBuffer = *(CommandBufferVK*)queueSubmitDesc.commandBuffers[i]; } - VkSemaphoreSubmitInfo* signalSemaphores = STACK_ALLOC(VkSemaphoreSubmitInfo, queueSubmitDesc.signalFenceNum); + VkSemaphoreSubmitInfo* signalSemaphores = StackAlloc(VkSemaphoreSubmitInfo, queueSubmitDesc.signalFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { signalSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; signalSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.signalFences[i].fence; diff --git a/Source/VK/ConversionVK.cpp b/Source/VK/ConversionVK.cpp index 0a369d5b..4bad0aeb 100644 --- a/Source/VK/ConversionVK.cpp +++ b/Source/VK/ConversionVK.cpp @@ -10,13 +10,10 @@ void nri::ConvertGeometryObjectSizesVK(VkAccelerationStructureGeometryKHR* destO for (uint32_t i = 0; i < objectNum; i++) { const GeometryObject& geometrySrc = sourceObjects[i]; - uint32_t triangleNum = geometrySrc.triangles.indexNum / 3; - triangleNum += (geometrySrc.triangles.indexNum == 0) ? geometrySrc.triangles.vertexNum / 3 : 0; + uint32_t triangleNum = (geometrySrc.triangles.indexNum ? geometrySrc.triangles.indexNum : geometrySrc.triangles.vertexNum) / 3; + VkDeviceAddress transform = GetBufferDeviceAddress(geometrySrc.triangles.transformBuffer) + geometrySrc.triangles.transformOffset; - const uint32_t boxNum = geometrySrc.boxes.boxNum; - primitiveNums[i] = geometrySrc.type == GeometryType::TRIANGLES ? triangleNum : boxNum; - - const VkDeviceAddress transform = GetBufferDeviceAddress(geometrySrc.triangles.transformBuffer) + geometrySrc.triangles.transformOffset; + primitiveNums[i] = geometrySrc.type == GeometryType::TRIANGLES ? triangleNum : geometrySrc.boxes.boxNum; VkAccelerationStructureGeometryKHR& geometryDst = destObjects[i]; geometryDst = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR}; @@ -37,18 +34,15 @@ void nri::ConvertGeometryObjectsVK( for (uint32_t i = 0; i < objectNum; i++) { const GeometryObject& geometrySrc = sourceObjects[i]; - uint32_t triangleNum = geometrySrc.triangles.indexNum / 3; - triangleNum += (geometrySrc.triangles.indexNum == 0) ? geometrySrc.triangles.vertexNum / 3 : 0; + uint32_t triangleNum = (geometrySrc.triangles.indexNum ? geometrySrc.triangles.indexNum : geometrySrc.triangles.vertexNum) / 3; - const uint32_t boxNum = geometrySrc.boxes.boxNum; + VkDeviceAddress aabbs = GetBufferDeviceAddress(geometrySrc.boxes.buffer) + geometrySrc.boxes.offset; + VkDeviceAddress vertices = GetBufferDeviceAddress(geometrySrc.triangles.vertexBuffer) + geometrySrc.triangles.vertexOffset; + VkDeviceAddress indices = GetBufferDeviceAddress(geometrySrc.triangles.indexBuffer) + geometrySrc.triangles.indexOffset; + VkDeviceAddress transform = GetBufferDeviceAddress(geometrySrc.triangles.transformBuffer) + geometrySrc.triangles.transformOffset; ranges[i] = {}; - ranges[i].primitiveCount = geometrySrc.type == GeometryType::TRIANGLES ? triangleNum : boxNum; - - const VkDeviceAddress aabbs = GetBufferDeviceAddress(geometrySrc.boxes.buffer) + geometrySrc.boxes.offset; - const VkDeviceAddress vertices = GetBufferDeviceAddress(geometrySrc.triangles.vertexBuffer) + geometrySrc.triangles.vertexOffset; - const VkDeviceAddress indices = GetBufferDeviceAddress(geometrySrc.triangles.indexBuffer) + geometrySrc.triangles.indexOffset; - const VkDeviceAddress transform = GetBufferDeviceAddress(geometrySrc.triangles.transformBuffer) + geometrySrc.triangles.transformOffset; + ranges[i].primitiveCount = geometrySrc.type == GeometryType::TRIANGLES ? triangleNum : geometrySrc.boxes.boxNum; VkAccelerationStructureGeometryKHR& geometryDst = destObjects[i]; geometryDst = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR}; diff --git a/Source/VK/DescriptorPoolVK.cpp b/Source/VK/DescriptorPoolVK.cpp index 2a87d5d0..1591448d 100644 --- a/Source/VK/DescriptorPoolVK.cpp +++ b/Source/VK/DescriptorPoolVK.cpp @@ -9,16 +9,16 @@ using namespace nri; DescriptorPoolVK::~DescriptorPoolVK() { - const auto& lowLevelAllocator = m_Device.GetStdAllocator().GetInterface(); - + const auto& allocator = m_Device.GetStdAllocator().GetInterface(); for (size_t i = 0; i < m_AllocatedSets.size(); i++) { m_AllocatedSets[i]->~DescriptorSetVK(); - lowLevelAllocator.Free(lowLevelAllocator.userArg, m_AllocatedSets[i]); + allocator.Free(allocator.userArg, m_AllocatedSets[i]); } - const auto& vk = m_Device.GetDispatchTable(); - if (m_Handle != VK_NULL_HANDLE && m_OwnsNativeObjects) + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); vk.DestroyDescriptorPool(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } inline void AddDescriptorPoolSize(VkDescriptorPoolSize* poolSizeArray, uint32_t& poolSizeArraySize, VkDescriptorType type, uint32_t descriptorCount) { @@ -31,10 +31,6 @@ inline void AddDescriptorPoolSize(VkDescriptorPoolSize* poolSizeArray, uint32_t& } Result DescriptorPoolVK::Create(const DescriptorPoolDesc& descriptorPoolDesc) { - m_OwnsNativeObjects = true; - - const auto& vk = m_Device.GetDispatchTable(); - VkDescriptorPoolSize descriptorPoolSizeArray[16] = {}; for (uint32_t i = 0; i < GetCountOf(descriptorPoolSizeArray); i++) descriptorPoolSizeArray[i].type = (VkDescriptorType)i; @@ -54,6 +50,7 @@ Result DescriptorPoolVK::Create(const DescriptorPoolDesc& descriptorPoolDesc) { const VkDescriptorPoolCreateInfo info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, descriptorPoolDesc.descriptorSetMaxNum, poolSizeCount, descriptorPoolSizeArray}; + const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.CreateDescriptorPool(m_Device, &info, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateDescriptorPool returned %d", (int32_t)result); @@ -61,6 +58,9 @@ Result DescriptorPoolVK::Create(const DescriptorPoolDesc& descriptorPoolDesc) { } Result DescriptorPoolVK::Create(const DescriptorPoolVKDesc& descriptorPoolVKDesc) { + if (!descriptorPoolVKDesc.vkDescriptorPool) + return Result::INVALID_ARGUMENT; + m_OwnsNativeObjects = false; m_Handle = (VkDescriptorPool)descriptorPoolVKDesc.vkDescriptorPool; diff --git a/Source/VK/DescriptorPoolVK.h b/Source/VK/DescriptorPoolVK.h index 24c89e2a..937a4057 100644 --- a/Source/VK/DescriptorPoolVK.h +++ b/Source/VK/DescriptorPoolVK.h @@ -40,7 +40,7 @@ struct DescriptorPoolVK { Vector m_AllocatedSets; VkDescriptorPool m_Handle = VK_NULL_HANDLE; uint32_t m_UsedSets = 0; - bool m_OwnsNativeObjects = false; + bool m_OwnsNativeObjects = true; }; } // namespace nri \ No newline at end of file diff --git a/Source/VK/DescriptorSetVK.cpp b/Source/VK/DescriptorSetVK.cpp index 68d2bf41..c3e30520 100644 --- a/Source/VK/DescriptorSetVK.cpp +++ b/Source/VK/DescriptorSetVK.cpp @@ -184,10 +184,10 @@ inline void DescriptorSetVK::UpdateDescriptorRanges(uint32_t rangeOffset, uint32 constexpr uint32_t writesPerIteration = 1024; uint32_t writeMaxNum = std::min(writesPerIteration, rangeNum); - VkWriteDescriptorSet* writes = STACK_ALLOC(VkWriteDescriptorSet, writeMaxNum); + VkWriteDescriptorSet* writes = StackAlloc(VkWriteDescriptorSet, writeMaxNum); constexpr size_t slabSize = 32768; - SlabAllocator slab(STACK_ALLOC(uint8_t, slabSize), slabSize); + SlabAllocator slab(StackAlloc(uint8_t, slabSize), slabSize); const auto& vk = m_Device.GetDispatchTable(); @@ -224,8 +224,8 @@ inline void DescriptorSetVK::UpdateDescriptorRanges(uint32_t rangeOffset, uint32 } inline void DescriptorSetVK::UpdateDynamicConstantBuffers(uint32_t bufferOffset, uint32_t descriptorNum, const Descriptor* const* descriptors) { - VkWriteDescriptorSet* writes = STACK_ALLOC(VkWriteDescriptorSet, descriptorNum); - VkDescriptorBufferInfo* infos = STACK_ALLOC(VkDescriptorBufferInfo, descriptorNum); + VkWriteDescriptorSet* writes = StackAlloc(VkWriteDescriptorSet, descriptorNum); + VkDescriptorBufferInfo* infos = StackAlloc(VkDescriptorBufferInfo, descriptorNum); uint32_t writeNum = 0; for (uint32_t j = 0; j < descriptorNum; j++) { @@ -246,7 +246,7 @@ inline void DescriptorSetVK::UpdateDynamicConstantBuffers(uint32_t bufferOffset, inline void DescriptorSetVK::Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc) { const uint32_t descriptorRangeNum = descriptorSetCopyDesc.rangeNum + descriptorSetCopyDesc.dynamicConstantBufferNum; - VkCopyDescriptorSet* copies = STACK_ALLOC(VkCopyDescriptorSet, descriptorRangeNum); + VkCopyDescriptorSet* copies = StackAlloc(VkCopyDescriptorSet, descriptorRangeNum); uint32_t copyNum = 0; const DescriptorSetVK& srcSetImpl = *(const DescriptorSetVK*)descriptorSetCopyDesc.srcDescriptorSet; diff --git a/Source/VK/DeviceVK.cpp b/Source/VK/DeviceVK.cpp index 24254c5c..ba069ca7 100644 --- a/Source/VK/DeviceVK.cpp +++ b/Source/VK/DeviceVK.cpp @@ -20,8 +20,6 @@ #include "SwapChainVK.h" #include "TextureVK.h" -static_assert(VK_LUID_SIZE == sizeof(uint64_t), "invalid sizeof"); - using namespace nri; constexpr VkBufferUsageFlags GetBufferUsageFlags(BufferUsageBits bufferUsageBits, uint32_t structureStride, bool isDeviceAddressSupported) { @@ -84,34 +82,31 @@ constexpr VkImageUsageFlags GetImageUsageFlags(TextureUsageBits textureUsageBits } Result CreateDeviceVK(const DeviceCreationDesc& desc, DeviceBase*& device) { - StdAllocator allocator(desc.memoryAllocatorInterface); - DeviceVK* implementation = Allocate(allocator, desc.callbackInterface, allocator); - - const Result res = implementation->Create(desc, {}, false); - - if (res == Result::SUCCESS) { - device = implementation; - return Result::SUCCESS; - } + StdAllocator allocator(desc.allocationCallbacks); + DeviceVK* impl = Allocate(allocator, desc.callbackInterface, allocator); + Result result = impl->Create(desc, {}, false); - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; - return res; + return result; } Result CreateDeviceVK(const DeviceCreationVKDesc& desc, DeviceBase*& device) { - StdAllocator allocator(desc.memoryAllocatorInterface); - DeviceVK* implementation = Allocate(allocator, desc.callbackInterface, allocator); - const Result res = implementation->Create({}, desc, true); - - if (res == Result::SUCCESS) { - device = implementation; - return Result::SUCCESS; - } + StdAllocator allocator(desc.allocationCallbacks); + DeviceVK* impl = Allocate(allocator, desc.callbackInterface, allocator); + Result result = impl->Create({}, desc, true); - Deallocate(allocator, implementation); + if (result != Result::SUCCESS) { + Destroy(allocator, impl); + device = nullptr; + } else + device = (DeviceBase*)impl; - return res; + return result; } inline bool IsExtensionSupported(const char* ext, const Vector& list) { @@ -132,7 +127,7 @@ inline bool IsExtensionSupported(const char* ext, const Vector& lis return false; } -void* VKAPI_PTR vkAllocateHostMemory(void* pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { +static void* VKAPI_PTR vkAllocateHostMemory(void* pUserData, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { MaybeUnused(allocationScope); StdAllocator& stdAllocator = *(StdAllocator*)pUserData; @@ -141,7 +136,7 @@ void* VKAPI_PTR vkAllocateHostMemory(void* pUserData, size_t size, size_t alignm return lowLevelAllocator.Allocate(lowLevelAllocator.userArg, size, alignment); } -void* VKAPI_PTR vkReallocateHostMemory(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { +static void* VKAPI_PTR vkReallocateHostMemory(void* pUserData, void* pOriginal, size_t size, size_t alignment, VkSystemAllocationScope allocationScope) { MaybeUnused(allocationScope); StdAllocator& stdAllocator = *(StdAllocator*)pUserData; @@ -150,7 +145,7 @@ void* VKAPI_PTR vkReallocateHostMemory(void* pUserData, void* pOriginal, size_t return lowLevelAllocator.Reallocate(lowLevelAllocator.userArg, pOriginal, size, alignment); } -void VKAPI_PTR vkFreeHostMemory(void* pUserData, void* pMemory) { +static void VKAPI_PTR vkFreeHostMemory(void* pUserData, void* pMemory) { StdAllocator& stdAllocator = *(StdAllocator*)pUserData; const auto& lowLevelAllocator = stdAllocator.GetInterface(); @@ -315,23 +310,12 @@ void DeviceVK::ProcessDeviceExtensions(Vector& desiredDeviceExts, b desiredDeviceExts.push_back(VK_NV_LOW_LATENCY_2_EXTENSION_NAME); } -template -Result DeviceVK::CreateImplementation(Interface*& entity, const Args&... args) { - Implementation* implementation = Allocate(GetStdAllocator(), *this); - Result result = implementation->Create(args...); - - if (result == Result::SUCCESS) { - entity = (Interface*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - entity = nullptr; - - return result; -} - DeviceVK::DeviceVK(const CallbackInterface& callbacks, const StdAllocator& stdAllocator) : DeviceBase(callbacks, stdAllocator) { + m_AllocationCallbacks.pUserData = &GetStdAllocator(); + m_AllocationCallbacks.pfnAllocation = vkAllocateHostMemory; + m_AllocationCallbacks.pfnReallocation = vkReallocateHostMemory; + m_AllocationCallbacks.pfnFree = vkFreeHostMemory; + m_Desc.graphicsAPI = GraphicsAPI::VK; m_Desc.nriVersionMajor = NRI_VERSION_MAJOR; m_Desc.nriVersionMinor = NRI_VERSION_MINOR; @@ -341,8 +325,10 @@ DeviceVK::~DeviceVK() { if (m_Device == VK_NULL_HANDLE) return; + DestroyVma(); + for (uint32_t i = 0; i < m_CommandQueues.size(); i++) - Deallocate(GetStdAllocator(), m_CommandQueues[i]); + Destroy(GetStdAllocator(), m_CommandQueues[i]); if (m_Messenger) { typedef PFN_vkDestroyDebugUtilsMessengerEXT Func; @@ -364,8 +350,9 @@ void DeviceVK::GetAdapterDesc() { VkPhysicalDeviceProperties2 props = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, &deviceIDProps}; m_VK.GetPhysicalDeviceProperties2(m_PhysicalDevice, &props); + static_assert(VK_LUID_SIZE == sizeof(uint64_t), "unexpected sizeof"); #ifdef _WIN32 - static_assert(sizeof(LUID) == VK_LUID_SIZE, "invalid sizeof"); + static_assert(VK_LUID_SIZE == sizeof(LUID), "unexpected sizeof"); ComPtr dxgiFactory; HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(&dxgiFactory)); @@ -383,7 +370,7 @@ void DeviceVK::GetAdapterDesc() { if (FAILED(hr)) REPORT_WARNING(this, "IDXGIAdapter::GetDesc() failed, result = 0x%08X!", hr); else { - wcstombs(m_Desc.adapterDesc.description, desc.Description, GetCountOf(m_Desc.adapterDesc.description) - 1); + wcstombs(m_Desc.adapterDesc.name, desc.Description, GetCountOf(m_Desc.adapterDesc.name) - 1); m_Desc.adapterDesc.luid = *(uint64_t*)&desc.AdapterLuid; m_Desc.adapterDesc.videoMemorySize = desc.DedicatedVideoMemory; m_Desc.adapterDesc.systemMemorySize = desc.DedicatedSystemMemory + desc.SharedSystemMemory; @@ -391,7 +378,7 @@ void DeviceVK::GetAdapterDesc() { m_Desc.adapterDesc.vendor = GetVendorFromID(desc.VendorId); } #else - strncpy(m_Desc.adapterDesc.description, props.properties.deviceName, sizeof(m_Desc.adapterDesc.description)); + strncpy(m_Desc.adapterDesc.name, props.properties.deviceName, sizeof(m_Desc.adapterDesc.name)); m_Desc.adapterDesc.luid = *(uint64_t*)&deviceIDProps.deviceLUID[0]; m_Desc.adapterDesc.deviceId = props.properties.deviceID; m_Desc.adapterDesc.vendor = GetVendorFromID(props.properties.vendorID); @@ -413,21 +400,14 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_OwnsNativeObjects = !isWrapper; m_SPIRVBindingOffsets = isWrapper ? deviceCreationVKDesc.spirvBindingOffsets : deviceCreationDesc.spirvBindingOffsets; - { // Custom allocator - m_AllocationCallbacks.pUserData = &GetStdAllocator(); - m_AllocationCallbacks.pfnAllocation = vkAllocateHostMemory; - m_AllocationCallbacks.pfnReallocation = vkReallocateHostMemory; - m_AllocationCallbacks.pfnFree = vkFreeHostMemory; - - if (!isWrapper) - m_AllocationCallbackPtr = &m_AllocationCallbacks; - } + if (!isWrapper && !deviceCreationDesc.disable3rdPartyAllocationCallbacks) + m_AllocationCallbackPtr = &m_AllocationCallbacks; { // Loader - const char* loaderPath = deviceCreationVKDesc.vulkanLoaderPath ? deviceCreationVKDesc.vulkanLoaderPath : VULKAN_LOADER_NAME; + const char* loaderPath = deviceCreationVKDesc.libraryPath ? deviceCreationVKDesc.libraryPath : VULKAN_LOADER_NAME; m_Loader = LoadSharedLibrary(loaderPath); if (!m_Loader) { - REPORT_ERROR(this, "Failed to load Vulkan loader: '%s'.", loaderPath); + REPORT_ERROR(this, "Failed to load Vulkan loader: '%s'", loaderPath); return Result::UNSUPPORTED; } } @@ -460,17 +440,17 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi return res; } - uint32_t minorVersion = 0; + m_MinorVersion = 0; { // Group if (isWrapper) { m_PhysicalDevice = (VkPhysicalDevice)deviceCreationVKDesc.vkPhysicalDevice; - minorVersion = deviceCreationVKDesc.minorVersion; + m_MinorVersion = deviceCreationVKDesc.minorVersion; } else { uint32_t deviceGroupNum = 0; m_VK.EnumeratePhysicalDeviceGroups(m_Instance, &deviceGroupNum, nullptr); - VkPhysicalDeviceGroupProperties* deviceGroups = STACK_ALLOC(VkPhysicalDeviceGroupProperties, deviceGroupNum); + VkPhysicalDeviceGroupProperties* deviceGroups = StackAlloc(VkPhysicalDeviceGroupProperties, deviceGroupNum); VkResult result = m_VK.EnumeratePhysicalDeviceGroups(m_Instance, &deviceGroupNum, deviceGroups); RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkEnumeratePhysicalDevices returned %d", (int32_t)result); @@ -485,9 +465,9 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_VK.GetPhysicalDeviceProperties2(group.physicalDevices[0], &props); uint32_t majorVersion = VK_VERSION_MAJOR(props.properties.apiVersion); - minorVersion = VK_VERSION_MINOR(props.properties.apiVersion); + m_MinorVersion = VK_VERSION_MINOR(props.properties.apiVersion); - bool isSupported = majorVersion * 10 + minorVersion >= 12; + bool isSupported = (majorVersion * 10 + m_MinorVersion) >= 12; if (deviceCreationDesc.adapterDesc) { const uint64_t luid = *(uint64_t*)idProps.deviceLUID; if (luid == deviceCreationDesc.adapterDesc->luid) { @@ -541,7 +521,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi APPEND_EXT(features12); VkPhysicalDeviceVulkan13Features features13 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES}; - if (minorVersion >= 3) { + if (m_MinorVersion >= 3) { APPEND_EXT(features13); } @@ -555,6 +535,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi VkPhysicalDeviceMaintenance5FeaturesKHR maintenance5Features = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_FEATURES_KHR}; if (IsExtensionSupported(VK_KHR_MAINTENANCE_5_EXTENSION_NAME, desiredDeviceExts)) { APPEND_EXT(maintenance5Features); + m_IsMaintenance5Supported = true; } VkPhysicalDeviceFragmentShadingRateFeaturesKHR shadingRateFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR}; @@ -698,7 +679,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi APPEND_EXT(props12); VkPhysicalDeviceVulkan13Properties props13 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES}; - if (minorVersion >= 3) { + if (m_MinorVersion >= 3) { APPEND_EXT(props13); } @@ -773,7 +754,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.textureArrayMaxDim = (Dim_t)limits.maxImageArrayLayers; m_Desc.texelBufferMaxDim = limits.maxTexelBufferElements; - const VkMemoryPropertyFlags neededFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + const VkMemoryPropertyFlags neededFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; for (uint32_t i = 0; i < m_MemoryProps.memoryTypeCount; i++) { const VkMemoryType& memoryType = m_MemoryProps.memoryTypes[i]; if ((memoryType.propertyFlags & neededFlags) == neededFlags) @@ -792,7 +773,6 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.bufferMaxSize = props13.maxBufferSize; m_Desc.bufferTextureGranularity = (uint32_t)limits.bufferImageGranularity; m_Desc.pushConstantsMaxSize = limits.maxPushConstantsSize; - m_Desc.memoryTier = MemoryTier::TWO; // TODO: seems to be the best match m_Desc.boundDescriptorSetMaxNum = limits.maxBoundDescriptorSets; m_Desc.perStageDescriptorSamplerMaxNum = limits.maxPerStageDescriptorSamplers; @@ -900,6 +880,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.isMeshShaderPipelineStatsSupported = meshShaderFeatures.meshShaderQueries == VK_TRUE; m_Desc.isDrawMeshTasksIndirectSupported = true; m_Desc.isEnchancedBarrierSupported = true; + m_Desc.isMemoryTier2Supported = true; // TODO: seems to be the best match m_Desc.isShaderNativeI16Supported = features.features.shaderInt16; m_Desc.isShaderNativeF16Supported = features12.shaderFloat16; @@ -977,16 +958,16 @@ void DeviceVK::GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memory const auto& vk = GetDispatchTable(); vk.GetDeviceBufferMemoryRequirements(m_Device, &bufferMemoryRequirements, &requirements); - MemoryTypeUnion memoryType = {}; - memoryType.unpacked.isDedicated = dedicatedRequirements.requiresDedicatedAllocation; + MemoryTypeInfo memoryTypeInfo = {}; + memoryTypeInfo.mustBeDedicated = dedicatedRequirements.prefersDedicatedAllocation; - bool found = GetMemoryType(memoryLocation, requirements.memoryRequirements.memoryTypeBits, memoryType.unpacked); - RETURN_ON_FAILURE(this, found, ReturnVoid(), "Can't find suitable memory type"); - - memoryDesc.size = requirements.memoryRequirements.size; - memoryDesc.alignment = (uint32_t)requirements.memoryRequirements.alignment; - memoryDesc.type = memoryType.packed; - memoryDesc.mustBeDedicated = dedicatedRequirements.requiresDedicatedAllocation; + memoryDesc = {}; + if (GetMemoryTypeInfo(memoryLocation, requirements.memoryRequirements.memoryTypeBits, memoryTypeInfo)) { + memoryDesc.size = requirements.memoryRequirements.size; + memoryDesc.alignment = (uint32_t)requirements.memoryRequirements.alignment; + memoryDesc.type = Pack(memoryTypeInfo); + memoryDesc.mustBeDedicated = memoryTypeInfo.mustBeDedicated; + } } void DeviceVK::GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const { @@ -1004,34 +985,45 @@ void DeviceVK::GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memo const auto& vk = GetDispatchTable(); vk.GetDeviceImageMemoryRequirements(m_Device, &imageMemoryRequirements, &requirements); - MemoryTypeUnion memoryType = {}; - memoryType.unpacked.isDedicated = dedicatedRequirements.requiresDedicatedAllocation; + MemoryTypeInfo memoryTypeInfo = {}; + memoryTypeInfo.mustBeDedicated = dedicatedRequirements.prefersDedicatedAllocation; + + memoryDesc = {}; + if (GetMemoryTypeInfo(memoryLocation, requirements.memoryRequirements.memoryTypeBits, memoryTypeInfo)) { + memoryDesc.size = requirements.memoryRequirements.size; + memoryDesc.alignment = (uint32_t)requirements.memoryRequirements.alignment; + memoryDesc.type = Pack(memoryTypeInfo); + memoryDesc.mustBeDedicated = memoryTypeInfo.mustBeDedicated; + } +} + +void DeviceVK::GetMemoryDesc(const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { + VkAccelerationStructureBuildSizesInfoKHR sizesInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; + GetAccelerationStructureBuildSizesInfo(accelerationStructureDesc, sizesInfo); - bool found = GetMemoryType(memoryLocation, requirements.memoryRequirements.memoryTypeBits, memoryType.unpacked); - RETURN_ON_FAILURE(this, found, ReturnVoid(), "Can't find suitable memory type"); + BufferDesc bufferDesc = {}; + bufferDesc.size = sizesInfo.accelerationStructureSize; + bufferDesc.usageMask = BufferUsageBits::RAY_TRACING_BUFFER; - memoryDesc.size = requirements.memoryRequirements.size; - memoryDesc.alignment = (uint32_t)requirements.memoryRequirements.alignment; - memoryDesc.type = memoryType.packed; - memoryDesc.mustBeDedicated = dedicatedRequirements.requiresDedicatedAllocation; + GetMemoryDesc(bufferDesc, memoryLocation, memoryDesc); } -bool DeviceVK::GetMemoryType(MemoryLocation memoryLocation, uint32_t memoryTypeMask, MemoryTypeInfo& memoryTypeInfo) const { +bool DeviceVK::GetMemoryTypeInfo(MemoryLocation memoryLocation, uint32_t memoryTypeMask, MemoryTypeInfo& memoryTypeInfo) const { VkMemoryPropertyFlags neededFlags = 0; // must have VkMemoryPropertyFlags undesiredFlags = 0; // have higher priority than desired VkMemoryPropertyFlags desiredFlags = 0; // nice to have if (memoryLocation == MemoryLocation::DEVICE) { neededFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - undesiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + undesiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; } else if (memoryLocation == MemoryLocation::DEVICE_UPLOAD) { - neededFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - desiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + neededFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; undesiredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + desiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } else { - neededFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + neededFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; undesiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - desiredFlags = memoryLocation == MemoryLocation::HOST_READBACK ? VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0; + desiredFlags = (memoryLocation == MemoryLocation::HOST_READBACK ? VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0) | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } // Phase 1: needed, undesired and desired @@ -1090,6 +1082,8 @@ bool DeviceVK::GetMemoryType(MemoryLocation memoryLocation, uint32_t memoryTypeM } } + CHECK(false, "Can't find suitable memory type"); + return false; } @@ -1110,6 +1104,33 @@ bool DeviceVK::GetMemoryTypeByIndex(uint32_t index, MemoryTypeInfo& memoryTypeIn return true; } +void DeviceVK::GetAccelerationStructureBuildSizesInfo(const AccelerationStructureDesc& accelerationStructureDesc, VkAccelerationStructureBuildSizesInfoKHR& sizesInfo) { + uint32_t geometryCount = accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL ? accelerationStructureDesc.instanceOrGeometryObjectNum : 1; + Scratch primitiveMaxNums = AllocateScratch(*this, uint32_t, geometryCount); + Scratch geometries = AllocateScratch(*this, VkAccelerationStructureGeometryKHR, geometryCount); + + if (accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL) + ConvertGeometryObjectSizesVK(geometries, primitiveMaxNums, accelerationStructureDesc.geometryObjects, geometryCount); + else { + geometries[0] = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR}; + geometries[0].geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + geometries[0].geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + geometries[0].geometry.aabbs.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR; + geometries[0].geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + + primitiveMaxNums[0] = accelerationStructureDesc.instanceOrGeometryObjectNum; + } + + VkAccelerationStructureBuildGeometryInfoKHR buildInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR}; + buildInfo.type = GetAccelerationStructureType(accelerationStructureDesc.type); + buildInfo.flags = GetAccelerationStructureBuildFlags(accelerationStructureDesc.flags); + buildInfo.geometryCount = geometryCount; + buildInfo.pGeometries = geometries; + + const auto& vk = GetDispatchTable(); + vk.GetAccelerationStructureBuildSizesKHR(m_Device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, primitiveMaxNums, &sizesInfo); +} + const char* GetObjectTypeName(VkObjectType objectType) { switch (objectType) { case VK_OBJECT_TYPE_INSTANCE: @@ -1524,6 +1545,7 @@ Result DeviceVK::ResolveDispatchTable(const Vector& desiredDeviceEx GET_DEVICE_CORE_OR_KHR_PROC(MapMemory); GET_DEVICE_CORE_OR_KHR_PROC(UnmapMemory); GET_DEVICE_CORE_OR_KHR_PROC(FreeMemory); + GET_DEVICE_CORE_OR_KHR_PROC(FlushMappedMemoryRanges); GET_DEVICE_CORE_OR_KHR_PROC(QueueWaitIdle); GET_DEVICE_CORE_OR_KHR_PROC(QueueSubmit2); GET_DEVICE_CORE_OR_KHR_PROC(GetSemaphoreCounterValue); @@ -1629,8 +1651,8 @@ Result DeviceVK::ResolveDispatchTable(const Vector& desiredDeviceEx return Result::SUCCESS; } -void DeviceVK::Destroy() { - Deallocate(GetStdAllocator(), this); +void DeviceVK::Destruct() { + Destroy(GetStdAllocator(), this); } //================================================================================================================ @@ -1697,10 +1719,11 @@ inline Result DeviceVK::CreateCommandQueue(const CommandQueueVKDesc& commandQueu } // Create - Result result = CreateImplementation(commandQueue, commandQueueVKDesc.commandQueueType, commandQueueVKDesc.queueFamilyIndex, (VkQueue)commandQueueVKDesc.vkQueue); + Result result = + CreateImplementation(commandQueue, commandQueueVKDesc.commandQueueType, commandQueueVKDesc.queueFamilyIndex, (VkQueue)commandQueueVKDesc.vkQueue); if (result == Result::SUCCESS) { // Replace old with new - Deallocate(GetStdAllocator(), m_CommandQueues[index]); + Destroy(GetStdAllocator(), m_CommandQueues[index]); m_CommandQueues[index] = (CommandQueueVK*)commandQueue; m_QueueFamilyIndices[index] = commandQueueVKDesc.queueFamilyIndex; @@ -1724,275 +1747,88 @@ inline Result DeviceVK::CreateCommandQueue(const CommandQueueVKDesc& commandQueu return result; } -inline Result DeviceVK::CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator) { - return CreateImplementation(commandAllocator, commandQueue); -} - -inline Result DeviceVK::CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return CreateImplementation(descriptorPool, descriptorPoolDesc); -} - -Result DeviceVK::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { // TODO: not inline - return CreateImplementation(buffer, bufferDesc); -} - -inline Result DeviceVK::CreateTexture(const TextureDesc& textureDesc, Texture*& texture) { - return CreateImplementation(texture, textureDesc); -} - -inline Result DeviceVK::CreateBufferView(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { - return CreateImplementation(bufferView, bufferViewDesc); -} - -inline Result DeviceVK::CreateTexture1DView(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceVK::CreateTexture2DView(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceVK::CreateTexture3DView(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { - return CreateImplementation(textureView, textureViewDesc); -} - -inline Result DeviceVK::CreateSampler(const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return CreateImplementation(sampler, samplerDesc); -} - -inline Result DeviceVK::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - return CreateImplementation(pipelineLayout, pipelineLayoutDesc); -} - -inline Result DeviceVK::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, graphicsPipelineDesc); -} - -inline Result DeviceVK::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, computePipelineDesc); -} - -inline Result DeviceVK::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return CreateImplementation(queryPool, queryPoolDesc); -} - -inline Result DeviceVK::CreateFence(uint64_t initialValue, Fence*& fence) { - return CreateImplementation(fence, initialValue); -} - -inline Result DeviceVK::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return CreateImplementation(swapChain, swapChainDesc); -} - -inline Result DeviceVK::CreatePipeline(const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline) { - return CreateImplementation(pipeline, rayTracingPipelineDesc); -} - -inline Result DeviceVK::CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return CreateImplementation(accelerationStructure, accelerationStructureDesc); -} - -inline Result DeviceVK::CreateCommandAllocator(const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { - return CreateImplementation(commandAllocator, commandAllocatorVKDesc); -} - -inline Result DeviceVK::CreateCommandBuffer(const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { - return CreateImplementation(commandBuffer, commandBufferVKDesc); -} - -inline Result DeviceVK::CreateDescriptorPool(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { - return CreateImplementation(descriptorPool, descriptorPoolVKDesc); -} - -inline Result DeviceVK::CreateBuffer(const BufferVKDesc& bufferDesc, Buffer*& buffer) { - return CreateImplementation(buffer, bufferDesc); -} - -inline Result DeviceVK::CreateTexture(const TextureVKDesc& textureVKDesc, Texture*& texture) { - return CreateImplementation(texture, textureVKDesc); -} - -inline Result DeviceVK::CreateMemory(const MemoryVKDesc& memoryVKDesc, Memory*& memory) { - return CreateImplementation(memory, memoryVKDesc); -} - -inline Result DeviceVK::CreateGraphicsPipeline(NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - PipelineVK* implementation = Allocate(GetStdAllocator(), *this); - Result result = implementation->CreateGraphics(vkPipeline); - - if (result == Result::SUCCESS) { - pipeline = (Pipeline*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return result; -} - -inline Result DeviceVK::CreateComputePipeline(NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - PipelineVK* implementation = Allocate(GetStdAllocator(), *this); - Result result = implementation->CreateCompute(vkPipeline); - - if (result == Result::SUCCESS) { - pipeline = (Pipeline*)implementation; - return Result::SUCCESS; - } - - Deallocate(GetStdAllocator(), implementation); - - return result; -} - -inline Result DeviceVK::CreateQueryPool(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { - return CreateImplementation(queryPool, queryPoolVKDesc); -} - -inline Result DeviceVK::CreateAccelerationStructure(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return CreateImplementation(accelerationStructure, accelerationStructureDesc); -} - -inline void DeviceVK::DestroyCommandAllocator(CommandAllocator& commandAllocator) { - Deallocate(GetStdAllocator(), (CommandAllocatorVK*)&commandAllocator); -} - -inline void DeviceVK::DestroyDescriptorPool(DescriptorPool& descriptorPool) { - Deallocate(GetStdAllocator(), (DescriptorPoolVK*)&descriptorPool); -} - -void DeviceVK::DestroyBuffer(Buffer& buffer) { // TODO: not inline - Deallocate(GetStdAllocator(), (BufferVK*)&buffer); -} - -inline void DeviceVK::DestroyTexture(Texture& texture) { - Deallocate(GetStdAllocator(), (TextureVK*)&texture); -} - -inline void DeviceVK::DestroyDescriptor(Descriptor& descriptor) { - Deallocate(GetStdAllocator(), (DescriptorVK*)&descriptor); -} - -inline void DeviceVK::DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - Deallocate(GetStdAllocator(), (PipelineLayoutVK*)&pipelineLayout); -} - -inline void DeviceVK::DestroyPipeline(Pipeline& pipeline) { - Deallocate(GetStdAllocator(), (PipelineVK*)&pipeline); -} - -inline void DeviceVK::DestroyQueryPool(QueryPool& queryPool) { - Deallocate(GetStdAllocator(), (QueryPoolVK*)&queryPool); -} - -inline void DeviceVK::DestroyFence(Fence& fence) { - Deallocate(GetStdAllocator(), (FenceVK*)&fence); -} - -inline void DeviceVK::DestroySwapChain(SwapChain& swapChain) { - Deallocate(GetStdAllocator(), (SwapChainVK*)&swapChain); -} - -inline void DeviceVK::DestroyAccelerationStructure(AccelerationStructure& accelerationStructure) { - Deallocate(GetStdAllocator(), (AccelerationStructureVK*)&accelerationStructure); -} - -inline Result DeviceVK::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - return CreateImplementation(memory, allocateMemoryDesc); -} - inline Result DeviceVK::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - if (memoryBindingDescNum == 0) + if (!memoryBindingDescNum) return Result::SUCCESS; - VkBindBufferMemoryInfo* infos = STACK_ALLOC(VkBindBufferMemoryInfo, memoryBindingDescNum); - uint32_t boundMemoryNum = 0; + Scratch infos = AllocateScratch(*this, VkBindBufferMemoryInfo, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { - const BufferMemoryBindingDesc& bindingDesc = memoryBindingDescs[i]; + const BufferMemoryBindingDesc& memoryBindingDesc = memoryBindingDescs[i]; - MemoryVK& memoryImpl = *(MemoryVK*)bindingDesc.memory; - BufferVK& bufferImpl = *(BufferVK*)bindingDesc.buffer; + BufferVK& bufferImpl = *(BufferVK*)memoryBindingDesc.buffer; + MemoryVK& memoryImpl = *(MemoryVK*)memoryBindingDesc.memory; - const MemoryTypeUnion memoryType = {memoryImpl.GetType()}; - - if (memoryImpl.OwnsNativeObjects() && memoryType.unpacked.isDedicated) + MemoryTypeInfo memoryTypeInfo = Unpack(memoryImpl.GetType()); + if (memoryTypeInfo.mustBeDedicated) memoryImpl.CreateDedicated(bufferImpl); - if (bufferImpl.OwnsNativeObjects()) { - VkBindBufferMemoryInfo& info = infos[boundMemoryNum++]; - info = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO}; - info.buffer = bufferImpl.GetHandle(); - info.memory = memoryImpl.GetHandle(); - info.memoryOffset = bindingDesc.offset; - } - - if (IsHostVisibleMemory(memoryType.unpacked.location)) - bufferImpl.SetHostMemory(memoryImpl, bindingDesc.offset); + VkBindBufferMemoryInfo& info = infos[i]; + info = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO}; + info.buffer = bufferImpl.GetHandle(); + info.memory = memoryImpl.GetHandle(); + info.memoryOffset = memoryBindingDesc.offset; } - if (boundMemoryNum > 0) { - VkResult result = m_VK.BindBufferMemory2(m_Device, boundMemoryNum, infos); - RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkBindBufferMemory2 returned %d", (int32_t)result); - } + VkResult result = m_VK.BindBufferMemory2(m_Device, memoryBindingDescNum, infos); + RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkBindBufferMemory2 returned %d", (int32_t)result); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { - BufferVK& bufferImpl = *(BufferVK*)memoryBindingDescs[i].buffer; - bufferImpl.ReadDeviceAddress(); + const BufferMemoryBindingDesc& memoryBindingDesc = memoryBindingDescs[i]; + + BufferVK& bufferImpl = *(BufferVK*)memoryBindingDesc.buffer; + MemoryVK& memoryImpl = *(MemoryVK*)memoryBindingDesc.memory; + + bufferImpl.FinishMemoryBinding(memoryImpl, memoryBindingDesc.offset); } return Result::SUCCESS; } inline Result DeviceVK::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - if (memoryBindingDescNum == 0) + if (!memoryBindingDescNum) return Result::SUCCESS; - VkBindImageMemoryInfo* infos = STACK_ALLOC(VkBindImageMemoryInfo, memoryBindingDescNum); - uint32_t boundMemoryNum = 0; + Scratch infos = AllocateScratch(*this, VkBindImageMemoryInfo, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { - const TextureMemoryBindingDesc& bindingDesc = memoryBindingDescs[i]; + const TextureMemoryBindingDesc& memoryBindingDesc = memoryBindingDescs[i]; - MemoryVK& memoryImpl = *(MemoryVK*)bindingDesc.memory; - TextureVK& textureImpl = *(TextureVK*)bindingDesc.texture; + MemoryVK& memoryImpl = *(MemoryVK*)memoryBindingDesc.memory; + TextureVK& textureImpl = *(TextureVK*)memoryBindingDesc.texture; - const MemoryTypeUnion memoryType = {memoryImpl.GetType()}; - - if (memoryImpl.OwnsNativeObjects() && memoryType.unpacked.isDedicated) + MemoryTypeInfo memoryTypeInfo = Unpack(memoryImpl.GetType()); + if (memoryTypeInfo.mustBeDedicated) memoryImpl.CreateDedicated(textureImpl); - if (textureImpl.OwnsNativeObjects()) { - VkBindImageMemoryInfo& info = infos[boundMemoryNum++]; - info = {VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO}; - info.image = textureImpl.GetHandle(); - info.memory = memoryImpl.GetHandle(); - info.memoryOffset = bindingDesc.offset; - } + VkBindImageMemoryInfo& info = infos[i]; + info = {VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO}; + info.image = textureImpl.GetHandle(); + info.memory = memoryImpl.GetHandle(); + info.memoryOffset = memoryBindingDesc.offset; } - if (boundMemoryNum > 0) { - VkResult result = m_VK.BindImageMemory2(m_Device, memoryBindingDescNum, infos); - RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkBindImageMemory2 returned %d", (int32_t)result); - } + VkResult result = m_VK.BindImageMemory2(m_Device, memoryBindingDescNum, infos); + RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkBindImageMemory2 returned %d", (int32_t)result); return Result::SUCCESS; } inline Result DeviceVK::BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - if (memoryBindingDescNum == 0) + if (!memoryBindingDescNum) return Result::SUCCESS; - BufferMemoryBindingDesc* infos = ALLOCATE_SCRATCH(*this, BufferMemoryBindingDesc, memoryBindingDescNum); + Scratch infos = AllocateScratch(*this, BufferMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { - const AccelerationStructureMemoryBindingDesc& bindingDesc = memoryBindingDescs[i]; - AccelerationStructureVK& accelerationStructure = *(AccelerationStructureVK*)bindingDesc.accelerationStructure; + const AccelerationStructureMemoryBindingDesc& memoryBindingDesc = memoryBindingDescs[i]; + AccelerationStructureVK& accelerationStructure = *(AccelerationStructureVK*)memoryBindingDesc.accelerationStructure; BufferMemoryBindingDesc& bufferMemoryBinding = infos[i]; bufferMemoryBinding = {}; bufferMemoryBinding.buffer = (Buffer*)accelerationStructure.GetBuffer(); - bufferMemoryBinding.memory = bindingDesc.memory; - bufferMemoryBinding.offset = bindingDesc.offset; + bufferMemoryBinding.memory = memoryBindingDesc.memory; + bufferMemoryBinding.offset = memoryBindingDesc.offset; } Result result = BindBufferMemory(infos, memoryBindingDescNum); @@ -2002,15 +1838,9 @@ inline Result DeviceVK::BindAccelerationStructureMemory(const AccelerationStruct result = accelerationStructure.FinishCreation(); } - FREE_SCRATCH(*this, infos, memoryBindingDescNum); - return result; } -inline void DeviceVK::FreeMemory(Memory& memory) { - Deallocate(GetStdAllocator(), (MemoryVK*)&memory); -} - inline FormatSupportBits DeviceVK::GetFormatSupport(Format format) const { FormatSupportBits mask = FormatSupportBits::UNSUPPORTED; diff --git a/Source/VK/DeviceVK.h b/Source/VK/DeviceVK.h index 027e4ca3..97c5ff3e 100644 --- a/Source/VK/DeviceVK.h +++ b/Source/VK/DeviceVK.h @@ -35,6 +35,28 @@ struct DeviceVK final : public DeviceBase { return m_CoreInterface; } + inline bool IsHostCoherentMemory(MemoryTypeIndex memoryTypeIndex) const { + return (m_MemoryProps.memoryTypes[memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; + } + + inline VmaAllocator_T* GetVma() const { + return m_Vma; + } + + template + inline Result CreateImplementation(Interface*& entity, const Args&... args) { + Implementation* impl = Allocate(GetStdAllocator(), *this); + Result result = impl->Create(args...); + + if (result != Result::SUCCESS) { + Destroy(GetStdAllocator(), impl); + entity = nullptr; + } else + entity = (Interface*)impl; + + return result; + } + DeviceVK(const CallbackInterface& callbacks, const StdAllocator& stdAllocator); ~DeviceVK(); @@ -43,62 +65,26 @@ struct DeviceVK final : public DeviceBase { void FillCreateInfo(const TextureDesc& bufferDesc, VkImageCreateInfo& info) const; void GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; void GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; - bool GetMemoryType(MemoryLocation memoryLocation, uint32_t memoryTypeMask, MemoryTypeInfo& memoryTypeInfo) const; + void GetMemoryDesc(const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc); + bool GetMemoryTypeInfo(MemoryLocation memoryLocation, uint32_t memoryTypeMask, MemoryTypeInfo& memoryTypeInfo) const; bool GetMemoryTypeByIndex(uint32_t index, MemoryTypeInfo& memoryTypeInfo) const; + void GetAccelerationStructureBuildSizesInfo(const AccelerationStructureDesc& accelerationStructureDesc, VkAccelerationStructureBuildSizesInfoKHR& sizesInfo); void SetDebugNameToTrivialObject(VkObjectType objectType, uint64_t handle, const char* name); + Result CreateVma(); + void DestroyVma(); //================================================================================================================ // NRI //================================================================================================================ void SetDebugName(const char* name); - Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result CreateCommandQueue(const CommandQueueVKDesc& commandQueueDesc, CommandQueue*& commandQueue); - Result CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator); - Result CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool); - Result CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer); - Result CreateTexture(const TextureDesc& textureDesc, Texture*& texture); - Result CreateBufferView(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView); - Result CreateTexture1DView(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateTexture2DView(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateTexture3DView(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateSampler(const SamplerDesc& samplerDesc, Descriptor*& sampler); - Result CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout); - Result CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline); - Result CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline); - Result CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool); - Result CreateFence(uint64_t initialValue, Fence*& fence); - Result CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain); - Result CreatePipeline(const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline); - Result CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - Result CreateCommandAllocator(const CommandAllocatorVKDesc& commandAllocatorDesc, CommandAllocator*& commandAllocator); - Result CreateCommandBuffer(const CommandBufferVKDesc& commandBufferDesc, CommandBuffer*& commandBuffer); - Result CreateDescriptorPool(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool); - Result CreateBuffer(const BufferVKDesc& bufferDesc, Buffer*& buffer); - Result CreateTexture(const TextureVKDesc& textureVKDesc, Texture*& texture); - Result CreateMemory(const MemoryVKDesc& memoryVKDesc, Memory*& memory); - Result CreateGraphicsPipeline(NRIVkPipeline vkPipeline, Pipeline*& pipeline); - Result CreateComputePipeline(NRIVkPipeline vkPipeline, Pipeline*& pipeline); - Result CreateQueryPool(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool); - Result CreateAccelerationStructure(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - void DestroyCommandAllocator(CommandAllocator& commandAllocator); - void DestroyDescriptorPool(DescriptorPool& descriptorPool); - void DestroyBuffer(Buffer& buffer); - void DestroyTexture(Texture& texture); - void DestroyDescriptor(Descriptor& descriptor); - void DestroyPipelineLayout(PipelineLayout& pipelineLayout); - void DestroyPipeline(Pipeline& pipeline); - void DestroyQueryPool(QueryPool& queryPool); - void DestroyFence(Fence& fence); - void DestroySwapChain(SwapChain& swapChain); - void DestroyAccelerationStructure(AccelerationStructure& accelerationStructure); - Result AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory); + Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); Result BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); + Result QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) const; Result BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void FreeMemory(Memory& memory); FormatSupportBits GetFormatSupport(Format format) const; - Result QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) const; //================================================================================================================ // DeviceBase @@ -108,15 +94,16 @@ struct DeviceVK final : public DeviceBase { return m_Desc; } - void Destroy(); + void Destruct(); Result FillFunctionTable(CoreInterface& table) const; + Result FillFunctionTable(HelperInterface& table) const; + Result FillFunctionTable(LowLatencyInterface& table) const; + Result FillFunctionTable(MeshShaderInterface& table) const; + Result FillFunctionTable(RayTracingInterface& table) const; + Result FillFunctionTable(StreamerInterface& table) const; Result FillFunctionTable(SwapChainInterface& table) const; - Result FillFunctionTable(WrapperVKInterface& wrapperVKInterface) const; - Result FillFunctionTable(RayTracingInterface& rayTracingInterface) const; - Result FillFunctionTable(MeshShaderInterface& meshShaderInterface) const; - Result FillFunctionTable(HelperInterface& helperInterface) const; - Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; - Result FillFunctionTable(StreamerInterface& streamerInterface) const; + Result FillFunctionTable(ResourceAllocatorInterface& table) const; + Result FillFunctionTable(WrapperVKInterface& table) const; private: void FilterInstanceLayers(Vector& layers); @@ -130,9 +117,6 @@ struct DeviceVK final : public DeviceBase { Result ResolveInstanceDispatchTable(const Vector& desiredInstanceExts); Result ResolveDispatchTable(const Vector& desiredDeviceExts); - template - Result CreateImplementation(Interface*& entity, const Args&... args); - public: bool m_IsDescriptorIndexingSupported = false; bool m_IsDeviceAddressSupported = false; @@ -142,6 +126,7 @@ struct DeviceVK final : public DeviceBase { bool m_IsLowLatencySupported = false; bool m_IsMemoryPrioritySupported = false; bool m_IsMemoryBudgetSupported = false; + bool m_IsMaintenance5Supported = false; private: VkPhysicalDevice m_PhysicalDevice = nullptr; @@ -159,8 +144,10 @@ struct DeviceVK final : public DeviceBase { VkInstance m_Instance = VK_NULL_HANDLE; VkAllocationCallbacks* m_AllocationCallbackPtr = nullptr; VkDebugUtilsMessengerEXT m_Messenger = VK_NULL_HANDLE; + VmaAllocator_T* m_Vma = nullptr; uint32_t m_NumActiveFamilyIndices = 0; - bool m_OwnsNativeObjects = false; + uint32_t m_MinorVersion = 0; + bool m_OwnsNativeObjects = true; Lock m_Lock; }; diff --git a/Source/VK/DeviceVK.hpp b/Source/VK/DeviceVK.hpp index 0320e97e..f4a29a10 100644 --- a/Source/VK/DeviceVK.hpp +++ b/Source/VK/DeviceVK.hpp @@ -34,130 +34,107 @@ static Result NRI_CALL GetCommandQueue(Device& device, CommandQueueType commandQ static Result NRI_CALL CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator) { DeviceVK& device = ((CommandQueueVK&)commandQueue).GetDevice(); - return device.CreateCommandAllocator(commandQueue, commandAllocator); + return device.CreateImplementation(commandAllocator, commandQueue); } static Result NRI_CALL CreateDescriptorPool(Device& device, const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { - return ((DeviceVK&)device).CreateDescriptorPool(descriptorPoolDesc, descriptorPool); + return ((DeviceVK&)device).CreateImplementation(descriptorPool, descriptorPoolDesc); } static Result NRI_CALL CreateBuffer(Device& device, const BufferDesc& bufferDesc, Buffer*& buffer) { - return ((DeviceVK&)device).CreateBuffer(bufferDesc, buffer); + return ((DeviceVK&)device).CreateImplementation(buffer, bufferDesc); } static Result NRI_CALL CreateTexture(Device& device, const TextureDesc& textureDesc, Texture*& texture) { - return ((DeviceVK&)device).CreateTexture(textureDesc, texture); + return ((DeviceVK&)device).CreateImplementation(texture, textureDesc); } static Result NRI_CALL CreateBufferView(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView) { DeviceVK& device = ((const BufferVK*)bufferViewDesc.buffer)->GetDevice(); - return device.CreateBufferView(bufferViewDesc, bufferView); + return device.CreateImplementation(bufferView, bufferViewDesc); } static Result NRI_CALL CreateTexture1DView(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceVK& device = ((const TextureVK*)textureViewDesc.texture)->GetDevice(); - return device.CreateTexture1DView(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture2DView(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceVK& device = ((const TextureVK*)textureViewDesc.texture)->GetDevice(); - return device.CreateTexture2DView(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateTexture3DView(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView) { DeviceVK& device = ((const TextureVK*)textureViewDesc.texture)->GetDevice(); - return device.CreateTexture3DView(textureViewDesc, textureView); + return device.CreateImplementation(textureView, textureViewDesc); } static Result NRI_CALL CreateSampler(Device& device, const SamplerDesc& samplerDesc, Descriptor*& sampler) { - return ((DeviceVK&)device).CreateSampler(samplerDesc, sampler); + return ((DeviceVK&)device).CreateImplementation(sampler, samplerDesc); } static Result NRI_CALL CreatePipelineLayout(Device& device, const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { - return ((DeviceVK&)device).CreatePipelineLayout(pipelineLayoutDesc, pipelineLayout); + return ((DeviceVK&)device).CreateImplementation(pipelineLayout, pipelineLayoutDesc); } static Result NRI_CALL CreateGraphicsPipeline(Device& device, const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { - return ((DeviceVK&)device).CreatePipeline(graphicsPipelineDesc, pipeline); + return ((DeviceVK&)device).CreateImplementation(pipeline, graphicsPipelineDesc); } static Result NRI_CALL CreateComputePipeline(Device& device, const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { - return ((DeviceVK&)device).CreatePipeline(computePipelineDesc, pipeline); + return ((DeviceVK&)device).CreateImplementation(pipeline, computePipelineDesc); } static Result NRI_CALL CreateQueryPool(Device& device, const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool) { - return ((DeviceVK&)device).CreateQueryPool(queryPoolDesc, queryPool); + return ((DeviceVK&)device).CreateImplementation(queryPool, queryPoolDesc); } static Result NRI_CALL CreateFence(Device& device, uint64_t initialValue, Fence*& fence) { - return ((DeviceVK&)device).CreateFence(initialValue, fence); + return ((DeviceVK&)device).CreateImplementation(fence, initialValue); } -static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { - if (!(&commandAllocator)) - return; +static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { + Destroy((CommandBufferVK*)&commandBuffer); +} - ((CommandAllocatorVK&)commandAllocator).GetDevice().DestroyCommandAllocator(commandAllocator); +static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { + Destroy((CommandAllocatorVK*)&commandAllocator); } static void NRI_CALL DestroyDescriptorPool(DescriptorPool& descriptorPool) { - if (!(&descriptorPool)) - return; - - ((DescriptorPoolVK&)descriptorPool).GetDevice().DestroyDescriptorPool(descriptorPool); + Destroy((DescriptorPoolVK*)&descriptorPool); } static void NRI_CALL DestroyBuffer(Buffer& buffer) { - if (!(&buffer)) - return; - - ((BufferVK&)buffer).GetDevice().DestroyBuffer(buffer); + Destroy((BufferVK*)&buffer); } static void NRI_CALL DestroyTexture(Texture& texture) { - if (!(&texture)) - return; - - ((TextureVK&)texture).GetDevice().DestroyTexture(texture); + Destroy((TextureVK*)&texture); } static void NRI_CALL DestroyDescriptor(Descriptor& descriptor) { - if (!(&descriptor)) - return; - - ((DescriptorVK&)descriptor).GetDevice().DestroyDescriptor(descriptor); + Destroy((DescriptorVK*)&descriptor); } static void NRI_CALL DestroyPipelineLayout(PipelineLayout& pipelineLayout) { - if (!(&pipelineLayout)) - return; - - ((PipelineLayoutVK&)pipelineLayout).GetDevice().DestroyPipelineLayout(pipelineLayout); + Destroy((PipelineLayoutVK*)&pipelineLayout); } static void NRI_CALL DestroyPipeline(Pipeline& pipeline) { - if (!(&pipeline)) - return; - - ((PipelineVK&)pipeline).GetDevice().DestroyPipeline(pipeline); + Destroy((PipelineVK*)&pipeline); } static void NRI_CALL DestroyQueryPool(QueryPool& queryPool) { - if (!(&queryPool)) - return; - - ((QueryPoolVK&)queryPool).GetDevice().DestroyQueryPool(queryPool); + Destroy((QueryPoolVK*)&queryPool); } static void NRI_CALL DestroyFence(Fence& fence) { - if (!(&fence)) - return; - - ((FenceVK&)fence).GetDevice().DestroyFence(fence); + Destroy((FenceVK*)&fence); } static Result NRI_CALL AllocateMemory(Device& device, const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { - return ((DeviceVK&)device).AllocateMemory(allocateMemoryDesc, memory); + return ((DeviceVK&)device).CreateImplementation(memory, allocateMemoryDesc); } static Result NRI_CALL BindBufferMemory(Device& device, const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { @@ -169,10 +146,7 @@ static Result NRI_CALL BindTextureMemory(Device& device, const TextureMemoryBind } static void NRI_CALL FreeMemory(Memory& memory) { - if (!(&memory)) - return; - - ((MemoryVK&)memory).GetDevice().FreeMemory(memory); + Destroy((MemoryVK*)&memory); } static void NRI_CALL SetDeviceDebugName(Device& device, const char* name) { @@ -198,182 +172,97 @@ static void* NRI_CALL GetDeviceNativeObject(const Device& device) { return (VkDevice)((DeviceVK&)device); } -Result DeviceVK::FillFunctionTable(CoreInterface& coreInterface) const { - coreInterface = {}; - coreInterface.GetDeviceDesc = ::GetDeviceDesc; - coreInterface.GetBufferDesc = ::GetBufferDesc; - coreInterface.GetTextureDesc = ::GetTextureDesc; - coreInterface.GetFormatSupport = ::GetFormatSupport; - coreInterface.GetBufferMemoryDesc = ::GetBufferMemoryDesc; - coreInterface.GetTextureMemoryDesc = ::GetTextureMemoryDesc; - coreInterface.GetCommandQueue = ::GetCommandQueue; - coreInterface.CreateCommandAllocator = ::CreateCommandAllocator; - coreInterface.CreateDescriptorPool = ::CreateDescriptorPool; - coreInterface.CreateBuffer = ::CreateBuffer; - coreInterface.CreateTexture = ::CreateTexture; - coreInterface.CreateBufferView = ::CreateBufferView; - coreInterface.CreateTexture1DView = ::CreateTexture1DView; - coreInterface.CreateTexture2DView = ::CreateTexture2DView; - coreInterface.CreateTexture3DView = ::CreateTexture3DView; - coreInterface.CreateSampler = ::CreateSampler; - coreInterface.CreatePipelineLayout = ::CreatePipelineLayout; - coreInterface.CreateGraphicsPipeline = ::CreateGraphicsPipeline; - coreInterface.CreateComputePipeline = ::CreateComputePipeline; - coreInterface.CreateQueryPool = ::CreateQueryPool; - coreInterface.CreateFence = ::CreateFence; - coreInterface.DestroyCommandAllocator = ::DestroyCommandAllocator; - coreInterface.DestroyDescriptorPool = ::DestroyDescriptorPool; - coreInterface.DestroyBuffer = ::DestroyBuffer; - coreInterface.DestroyTexture = ::DestroyTexture; - coreInterface.DestroyDescriptor = ::DestroyDescriptor; - coreInterface.DestroyPipelineLayout = ::DestroyPipelineLayout; - coreInterface.DestroyPipeline = ::DestroyPipeline; - coreInterface.DestroyQueryPool = ::DestroyQueryPool; - coreInterface.DestroyFence = ::DestroyFence; - coreInterface.AllocateMemory = ::AllocateMemory; - coreInterface.BindBufferMemory = ::BindBufferMemory; - coreInterface.BindTextureMemory = ::BindTextureMemory; - coreInterface.FreeMemory = ::FreeMemory; - coreInterface.SetDeviceDebugName = ::SetDeviceDebugName; - coreInterface.SetPipelineDebugName = ::SetPipelineDebugName; - coreInterface.SetPipelineLayoutDebugName = ::SetPipelineLayoutDebugName; - coreInterface.SetMemoryDebugName = ::SetMemoryDebugName; - coreInterface.GetDeviceNativeObject = ::GetDeviceNativeObject; - - Core_Buffer_PartiallyFillFunctionTableVK(coreInterface); - Core_CommandAllocator_PartiallyFillFunctionTableVK(coreInterface); - Core_CommandBuffer_PartiallyFillFunctionTableVK(coreInterface); - Core_CommandQueue_PartiallyFillFunctionTableVK(coreInterface); - Core_Descriptor_PartiallyFillFunctionTableVK(coreInterface); - Core_DescriptorPool_PartiallyFillFunctionTableVK(coreInterface); - Core_DescriptorSet_PartiallyFillFunctionTableVK(coreInterface); - Core_Fence_PartiallyFillFunctionTableVK(coreInterface); - Core_QueryPool_PartiallyFillFunctionTableVK(coreInterface); - Core_Texture_PartiallyFillFunctionTableVK(coreInterface); - - return ValidateFunctionTable(coreInterface); -} - -#pragma endregion - -#pragma region[ SwapChain ] +Result DeviceVK::FillFunctionTable(CoreInterface& table) const { + table = {}; + Core_Device_PartiallyFillFunctionTableVK(table); + Core_Buffer_PartiallyFillFunctionTableVK(table); + Core_CommandAllocator_PartiallyFillFunctionTableVK(table); + Core_CommandBuffer_PartiallyFillFunctionTableVK(table); + Core_CommandQueue_PartiallyFillFunctionTableVK(table); + Core_Descriptor_PartiallyFillFunctionTableVK(table); + Core_DescriptorPool_PartiallyFillFunctionTableVK(table); + Core_DescriptorSet_PartiallyFillFunctionTableVK(table); + Core_Fence_PartiallyFillFunctionTableVK(table); + Core_QueryPool_PartiallyFillFunctionTableVK(table); + Core_Texture_PartiallyFillFunctionTableVK(table); -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceVK&)device).CreateSwapChain(swapChainDesc, swapChain); -} - -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; - - return ((SwapChainVK&)swapChain).GetDevice().DestroySwapChain(swapChain); -} - -Result DeviceVK::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - swapChainInterface = {}; - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; - - SwapChain_PartiallyFillFunctionTableVK(swapChainInterface); - - return ValidateFunctionTable(swapChainInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ WrapperVK ] +#pragma region[ Helper ] -static Result NRI_CALL CreateCommandQueue(Device& device, const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { - return ((DeviceVK&)device).CreateCommandQueue(commandQueueVKDesc, commandQueue); -} +static uint32_t NRI_CALL CalculateAllocationNumber(const Device& device, const ResourceGroupDesc& resourceGroupDesc) { + DeviceVK& deviceVK = (DeviceVK&)device; + HelperDeviceMemoryAllocator allocator(deviceVK.GetCoreInterface(), (Device&)device); -static Result NRI_CALL CreateCommandAllocator(Device& device, const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { - return ((DeviceVK&)device).CreateCommandAllocator(commandAllocatorVKDesc, commandAllocator); + return allocator.CalculateAllocationNumber(resourceGroupDesc); } -static Result NRI_CALL CreateCommandBuffer(Device& device, const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { - return ((DeviceVK&)device).CreateCommandBuffer(commandBufferVKDesc, commandBuffer); -} +static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + DeviceVK& deviceVK = (DeviceVK&)device; + HelperDeviceMemoryAllocator allocator(deviceVK.GetCoreInterface(), device); -static Result NRI_CALL CreateDescriptorPool(Device& device, const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { - return ((DeviceVK&)device).CreateDescriptorPool(descriptorPoolVKDesc, descriptorPool); + return allocator.AllocateAndBindMemory(resourceGroupDesc, allocations); } -static Result NRI_CALL CreateBuffer(Device& device, const BufferVKDesc& bufferVKDesc, Buffer*& buffer) { - return ((DeviceVK&)device).CreateBuffer(bufferVKDesc, buffer); +static Result NRI_CALL QueryVideoMemoryInfo(const Device& device, MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) { + return ((DeviceVK&)device).QueryVideoMemoryInfo(memoryLocation, videoMemoryInfo); } -static Result NRI_CALL CreateTexture(Device& device, const TextureVKDesc& textureVKDesc, Texture*& texture) { - return ((DeviceVK&)device).CreateTexture(textureVKDesc, texture); -} +Result DeviceVK::FillFunctionTable(HelperInterface& helperInterface) const { + helperInterface = {}; + Helper_CommandQueue_PartiallyFillFunctionTableVK(helperInterface); + Helper_Device_PartiallyFillFunctionTableVK(helperInterface); -static Result NRI_CALL CreateMemory(Device& device, const MemoryVKDesc& memoryVKDesc, Memory*& memory) { - return ((DeviceVK&)device).CreateMemory(memoryVKDesc, memory); + return ValidateFunctionTable(helperInterface); } -static Result NRI_CALL CreateGraphicsPipeline(Device& device, NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - return ((DeviceVK&)device).CreateGraphicsPipeline(vkPipeline, pipeline); -} +#pragma endregion -static Result NRI_CALL CreateComputePipeline(Device& device, NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - return ((DeviceVK&)device).CreateComputePipeline(vkPipeline, pipeline); -} +#pragma region[ LowLatency ] -static Result NRI_CALL CreateQueryPool(Device& device, const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { - return ((DeviceVK&)device).CreateQueryPool(queryPoolVKDesc, queryPool); -} +Result DeviceVK::FillFunctionTable(LowLatencyInterface& table) const { + table = {}; + if (!m_Desc.isLowLatencySupported) + return Result::UNSUPPORTED; -static Result NRI_CALL CreateAccelerationStructure(Device& device, const AccelerationStructureVKDesc& accelerationStructureVKDesc, AccelerationStructure*& accelerationStructure) { - return ((DeviceVK&)device).CreateAccelerationStructure(accelerationStructureVKDesc, accelerationStructure); -} + LowLatency_CommandQueue_PartiallyFillFunctionTableVK(table); + LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTableVK(table); -static NRIVkPhysicalDevice NRI_CALL GetVkPhysicalDevice(const Device& device) { - return (VkPhysicalDevice)((DeviceVK&)device); + return ValidateFunctionTable(table); } -static NRIVkInstance NRI_CALL GetVkInstance(const Device& device) { - return (VkInstance)((DeviceVK&)device); -} +#pragma endregion -static void* NRI_CALL GetVkGetInstanceProcAddr(const Device& device) { - return (void*)(((DeviceVK&)device).GetDispatchTable().GetInstanceProcAddr); -} +#pragma region[ MeshShader ] -static void* NRI_CALL GetVkGetDeviceProcAddr(const Device& device) { - return (void*)(((DeviceVK&)device).GetDispatchTable().GetDeviceProcAddr); -} +Result DeviceVK::FillFunctionTable(MeshShaderInterface& table) const { + table = {}; + if (!m_Desc.isMeshShaderSupported) + return Result::UNSUPPORTED; -Result DeviceVK::FillFunctionTable(WrapperVKInterface& wrapperVKInterface) const { - wrapperVKInterface = {}; - wrapperVKInterface.CreateCommandQueueVK = ::CreateCommandQueue; - wrapperVKInterface.CreateCommandAllocatorVK = ::CreateCommandAllocator; - wrapperVKInterface.CreateCommandBufferVK = ::CreateCommandBuffer; - wrapperVKInterface.CreateDescriptorPoolVK = ::CreateDescriptorPool; - wrapperVKInterface.CreateBufferVK = ::CreateBuffer; - wrapperVKInterface.CreateTextureVK = ::CreateTexture; - wrapperVKInterface.CreateMemoryVK = ::CreateMemory; - wrapperVKInterface.CreateGraphicsPipelineVK = ::CreateGraphicsPipeline; - wrapperVKInterface.CreateComputePipelineVK = ::CreateComputePipeline; - wrapperVKInterface.CreateQueryPoolVK = ::CreateQueryPool; - wrapperVKInterface.CreateAccelerationStructureVK = ::CreateAccelerationStructure; - wrapperVKInterface.GetVkPhysicalDevice = ::GetVkPhysicalDevice; - wrapperVKInterface.GetVkInstance = ::GetVkInstance; - wrapperVKInterface.GetVkGetDeviceProcAddr = ::GetVkGetDeviceProcAddr; - wrapperVKInterface.GetVkGetInstanceProcAddr = ::GetVkGetInstanceProcAddr; + MeshShader_CommandBuffer_PartiallyFillFunctionTableVK(table); - return ValidateFunctionTable(wrapperVKInterface); + return ValidateFunctionTable(table); } #pragma endregion #pragma region[ RayTracing ] +static void NRI_CALL GetAccelerationStructureMemoryDesc( + const Device& device, const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { + ((DeviceVK&)device).GetMemoryDesc(accelerationStructureDesc, memoryLocation, memoryDesc); +} + static Result NRI_CALL CreateRayTracingPipeline(Device& device, const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { - return ((DeviceVK&)device).CreatePipeline(pipelineDesc, pipeline); + return ((DeviceVK&)device).CreateImplementation(pipeline, pipelineDesc); } static Result NRI_CALL CreateAccelerationStructure(Device& device, const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { - return ((DeviceVK&)device).CreateAccelerationStructure(accelerationStructureDesc, accelerationStructure); + return ((DeviceVK&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); } static Result NRI_CALL BindAccelerationStructureMemory(Device& device, const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { @@ -381,159 +270,201 @@ static Result NRI_CALL BindAccelerationStructureMemory(Device& device, const Acc } static void NRI_CALL DestroyAccelerationStructure(AccelerationStructure& accelerationStructure) { - if (!(&accelerationStructure)) - return; - - return ((AccelerationStructureVK&)accelerationStructure).GetDevice().DestroyAccelerationStructure(accelerationStructure); + Destroy((AccelerationStructureVK*)&accelerationStructure); } -void FillFunctionTablePipelineVK(RayTracingInterface& rayTracingInterface); +void FillFunctionTablePipelineVK(RayTracingInterface& table); -Result DeviceVK::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { +Result DeviceVK::FillFunctionTable(RayTracingInterface& table) const { + table = {}; if (!m_Desc.isRayTracingSupported) return Result::UNSUPPORTED; - rayTracingInterface = {}; - rayTracingInterface.CreateRayTracingPipeline = ::CreateRayTracingPipeline; - rayTracingInterface.CreateAccelerationStructure = ::CreateAccelerationStructure; - rayTracingInterface.BindAccelerationStructureMemory = ::BindAccelerationStructureMemory; - rayTracingInterface.DestroyAccelerationStructure = ::DestroyAccelerationStructure; - - RayTracing_CommandBuffer_PartiallyFillFunctionTableVK(rayTracingInterface); - RayTracing_AccelerationStructure_PartiallyFillFunctionTableVK(rayTracingInterface); - FillFunctionTablePipelineVK(rayTracingInterface); + FillFunctionTablePipelineVK(table); + RayTracing_CommandBuffer_PartiallyFillFunctionTableVK(table); + RayTracing_AccelerationStructure_PartiallyFillFunctionTableVK(table); + RayTracing_Device_PartiallyFillFunctionTableVK(table); - return ValidateFunctionTable(rayTracingInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ MeshShader ] +#pragma region[ ResourceAllocator ] -Result DeviceVK::FillFunctionTable(MeshShaderInterface& meshShaderInterface) const { - if (!m_Desc.isMeshShaderSupported) - return Result::UNSUPPORTED; +static Result AllocateBuffer(Device& device, const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { + return ((DeviceVK&)device).CreateImplementation(buffer, bufferDesc); +} - meshShaderInterface = {}; +static Result AllocateTexture(Device& device, const AllocateTextureDesc& textureDesc, Texture*& texture) { + return ((DeviceVK&)device).CreateImplementation(texture, textureDesc); +} + +static Result AllocateAccelerationStructure(Device& device, const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceVK&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); +} - MeshShader_CommandBuffer_PartiallyFillFunctionTableVK(meshShaderInterface); +Result DeviceVK::FillFunctionTable(ResourceAllocatorInterface& table) const { + table = {}; + ResourceAllocator_Device_PartiallyFillFunctionTableVK(table); - return ValidateFunctionTable(meshShaderInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ Helper ] +#pragma region[ Streamer ] -static uint32_t NRI_CALL CalculateAllocationNumber(const Device& device, const ResourceGroupDesc& resourceGroupDesc) { +static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { DeviceVK& deviceVK = (DeviceVK&)device; - HelperDeviceMemoryAllocator allocator(deviceVK.GetCoreInterface(), (Device&)device); + StreamerImpl* impl = Allocate(deviceVK.GetStdAllocator(), device, deviceVK.GetCoreInterface()); + Result result = impl->Create(streamerDesc); - return allocator.CalculateAllocationNumber(resourceGroupDesc); + if (result != Result::SUCCESS) { + Destroy(deviceVK.GetStdAllocator(), impl); + streamer = nullptr; + } else + streamer = (Streamer*)impl; + + return result; } -static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { - DeviceVK& deviceVK = (DeviceVK&)device; - HelperDeviceMemoryAllocator allocator(deviceVK.GetCoreInterface(), device); +static void DestroyStreamer(Streamer& streamer) { + Destroy(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); +} - return allocator.AllocateAndBindMemory(resourceGroupDesc, allocations); +static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetConstantBuffer(); } -static Result NRI_CALL QueryVideoMemoryInfo(const Device& device, MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) { - return ((DeviceVK&)device).QueryVideoMemoryInfo(memoryLocation, videoMemoryInfo); +static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { + return ((StreamerImpl&)streamer).UpdateStreamerConstantBuffer(data, dataSize); } -Result DeviceVK::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CalculateAllocationNumber; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; - helperInterface.QueryVideoMemoryInfo = ::QueryVideoMemoryInfo; +static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerBufferUpdateRequest(bufferUpdateRequestDesc); +} - Helper_CommandQueue_PartiallyFillFunctionTableVK(helperInterface); +static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerTextureUpdateRequest(textureUpdateRequestDesc); +} - return ValidateFunctionTable(helperInterface); +static Result CopyStreamerUpdateRequests(Streamer& streamer) { + return ((StreamerImpl&)streamer).CopyStreamerUpdateRequests(); +} + +static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetDynamicBuffer(); +} + +static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { + ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); +} + +Result DeviceVK::FillFunctionTable(StreamerInterface& table) const { + table = {}; + Streamer_Device_PartiallyFillFunctionTableVK(table); + + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ LowLatency ] +#pragma region[ SwapChain ] -Result DeviceVK::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { - lowLatencyInterface = {}; - if (!m_Desc.isLowLatencySupported) - return Result::UNSUPPORTED; +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceVK&)device).CreateImplementation(swapChain, swapChainDesc); +} - LowLatency_CommandQueue_PartiallyFillFunctionTableVK(lowLatencyInterface); - LowLatency_SwapChain_PartiallyFillFunctionTableVK(lowLatencyInterface); +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + Destroy((SwapChainVK*)&swapChain); +} + +Result DeviceVK::FillFunctionTable(SwapChainInterface& table) const { + table = {}; + SwapChain_Device_PartiallyFillFunctionTableVK(table); + SwapChain_SwapChain_PartiallyFillFunctionTableVK(table); - return ValidateFunctionTable(lowLatencyInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ Streamer ] +#pragma region[ WrapperVK ] -static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { - DeviceVK& deviceVK = (DeviceVK&)device; +static Result NRI_CALL CreateCommandQueueVK(Device& device, const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { + return ((DeviceVK&)device).CreateCommandQueue(commandQueueVKDesc, commandQueue); +} - StreamerImpl* implementation = Allocate(deviceVK.GetStdAllocator(), device, deviceVK.GetCoreInterface()); - Result res = implementation->Create(streamerDesc); +static Result NRI_CALL CreateCommandAllocatorVK(Device& device, const CommandAllocatorVKDesc& commandAllocatorDesc, CommandAllocator*& commandAllocator) { + return ((DeviceVK&)device).CreateImplementation(commandAllocator, commandAllocatorDesc); +} - if (res == Result::SUCCESS) { - streamer = (Streamer*)implementation; - return Result::SUCCESS; - } +static Result NRI_CALL CreateCommandBufferVK(Device& device, const CommandBufferVKDesc& commandBufferDesc, CommandBuffer*& commandBuffer) { + return ((DeviceVK&)device).CreateImplementation(commandBuffer, commandBufferDesc); +} - Deallocate(deviceVK.GetStdAllocator(), implementation); +static Result NRI_CALL CreateDescriptorPoolVK(Device& device, const DescriptorPoolVKDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { + return ((DeviceVK&)device).CreateImplementation(descriptorPool, descriptorPoolDesc); +} - return res; +static Result NRI_CALL CreateBufferVK(Device& device, const BufferVKDesc& bufferDesc, Buffer*& buffer) { + return ((DeviceVK&)device).CreateImplementation(buffer, bufferDesc); } -static void DestroyStreamer(Streamer& streamer) { - Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); +static Result NRI_CALL CreateTextureVK(Device& device, const TextureVKDesc& textureDesc, Texture*& texture) { + return ((DeviceVK&)device).CreateImplementation(texture, textureDesc); } -static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { - return ((StreamerImpl&)streamer).GetConstantBuffer(); +static Result NRI_CALL CreateMemoryVK(Device& device, const MemoryVKDesc& memoryDesc, Memory*& memory) { + return ((DeviceVK&)device).CreateImplementation(memory, memoryDesc); } -static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { - return ((StreamerImpl&)streamer).UpdateStreamerConstantBuffer(data, dataSize); +static Result NRI_CALL CreateGraphicsPipelineVK(Device& device, VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + return ((DeviceVK&)device).CreateImplementation(pipeline, VK_PIPELINE_BIND_POINT_GRAPHICS, vkPipeline); } -static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { - return ((StreamerImpl&)streamer).AddStreamerBufferUpdateRequest(bufferUpdateRequestDesc); +static Result NRI_CALL CreateComputePipelineVK(Device& device, VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + return ((DeviceVK&)device).CreateImplementation(pipeline, VK_PIPELINE_BIND_POINT_COMPUTE, vkPipeline); } -static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { - return ((StreamerImpl&)streamer).AddStreamerTextureUpdateRequest(textureUpdateRequestDesc); +static Result NRI_CALL CreateQueryPoolVK(Device& device, const QueryPoolVKDesc& queryPoolDesc, QueryPool*& queryPool) { + return ((DeviceVK&)device).CreateImplementation(queryPool, queryPoolDesc); } -static Result CopyStreamerUpdateRequests(Streamer& streamer) { - return ((StreamerImpl&)streamer).CopyStreamerUpdateRequests(); +static Result NRI_CALL CreateAccelerationStructureVK(Device& device, const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceVK&)device).CreateImplementation(accelerationStructure, accelerationStructureDesc); } -static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { - return ((StreamerImpl&)streamer).GetDynamicBuffer(); +static VKHandle NRI_CALL GetPhysicalDeviceVK(const Device& device) { + return (VkPhysicalDevice)((DeviceVK&)device); } -static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { - ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); +static VKHandle NRI_CALL GetInstanceVK(const Device& device) { + return (VkInstance)((DeviceVK&)device); } -Result DeviceVK::FillFunctionTable(StreamerInterface& streamerInterface) const { - streamerInterface = {}; - streamerInterface.CreateStreamer = ::CreateStreamer; - streamerInterface.DestroyStreamer = ::DestroyStreamer; - streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; - streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; - streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; - streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; - streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; - streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; - streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; +static void* NRI_CALL GetInstanceProcAddrVK(const Device& device) { + return ((DeviceVK&)device).GetDispatchTable().GetInstanceProcAddr; +} - return ValidateFunctionTable(streamerInterface); +static void* NRI_CALL GetDeviceProcAddrVK(const Device& device) { + return ((DeviceVK&)device).GetDispatchTable().GetDeviceProcAddr; +} + +Result DeviceVK::FillFunctionTable(WrapperVKInterface& table) const { + table = {}; + WrapperVK_Device_PartiallyFillFunctionTableVK(table); + + return ValidateFunctionTable(table); } #pragma endregion + +Define_Core_Device_PartiallyFillFunctionTable(VK); +Define_Helper_Device_PartiallyFillFunctionTable(VK); +Define_RayTracing_Device_PartiallyFillFunctionTable(VK); +Define_Streamer_Device_PartiallyFillFunctionTable(VK); +Define_SwapChain_Device_PartiallyFillFunctionTable(VK); +Define_ResourceAllocator_Device_PartiallyFillFunctionTable(VK); +Define_WrapperVK_Device_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/DispatchTable.h b/Source/VK/DispatchTable.h index 9f482349..ae114616 100644 --- a/Source/VK/DispatchTable.h +++ b/Source/VK/DispatchTable.h @@ -90,6 +90,7 @@ struct DispatchTable { VULKAN_FUNCTION(MapMemory); VULKAN_FUNCTION(UnmapMemory); VULKAN_FUNCTION(FreeMemory); + VULKAN_FUNCTION(FlushMappedMemoryRanges); VULKAN_FUNCTION(QueueWaitIdle); VULKAN_FUNCTION(QueueSubmit2); VULKAN_FUNCTION(GetSemaphoreCounterValue); diff --git a/Source/VK/MemoryVK.cpp b/Source/VK/MemoryVK.cpp index 9a2491e8..c041231b 100644 --- a/Source/VK/MemoryVK.cpp +++ b/Source/VK/MemoryVK.cpp @@ -9,26 +9,28 @@ using namespace nri; MemoryVK::~MemoryVK() { - const auto& vk = m_Device.GetDispatchTable(); - - if (m_OwnsNativeObjects) + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); vk.FreeMemory(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result MemoryVK::Create(const MemoryVKDesc& memoryDesc) { - m_OwnsNativeObjects = false; + if (!memoryDesc.vkDeviceMemory) + return Result::INVALID_ARGUMENT; - MemoryTypeUnion memoryType = {}; + MemoryTypeInfo memoryTypeInfo = {}; - bool found = m_Device.GetMemoryTypeByIndex(memoryDesc.memoryTypeIndex, memoryType.unpacked); + bool found = m_Device.GetMemoryTypeByIndex(memoryDesc.memoryTypeIndex, memoryTypeInfo); RETURN_ON_FAILURE(&m_Device, found, Result::INVALID_ARGUMENT, "Can't find memory by index"); + m_OwnsNativeObjects = false; m_Handle = (VkDeviceMemory)memoryDesc.vkDeviceMemory; m_MappedMemory = (uint8_t*)memoryDesc.vkMappedMemory; - m_Type = memoryType.packed; + m_Type = Pack(memoryTypeInfo); const auto& vk = m_Device.GetDispatchTable(); - if (!m_MappedMemory && IsHostVisibleMemory(memoryType.unpacked.location)) { + if (!m_MappedMemory && IsHostVisibleMemory(memoryTypeInfo.location)) { VkResult result = vk.MapMemory(m_Device, m_Handle, 0, memoryDesc.size, 0, (void**)&m_MappedMemory); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkMapMemory returned %d", (int32_t)result); } @@ -37,12 +39,11 @@ Result MemoryVK::Create(const MemoryVKDesc& memoryDesc) { } Result MemoryVK::Create(const AllocateMemoryDesc& allocateMemoryDesc) { - m_OwnsNativeObjects = true; m_Type = allocateMemoryDesc.type; m_Priority = m_Device.m_IsMemoryPrioritySupported ? (allocateMemoryDesc.priority * 0.5f + 0.5f) : 0.5f; - MemoryTypeUnion memoryType = {allocateMemoryDesc.type}; - if (memoryType.unpacked.isDedicated) + MemoryTypeInfo memoryTypeInfo = Unpack(allocateMemoryDesc.type); + if (memoryTypeInfo.mustBeDedicated) return Result::SUCCESS; // dedicated allocation occurs on memory binding VkMemoryPriorityAllocateInfoEXT priorityInfo = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT}; @@ -55,13 +56,13 @@ Result MemoryVK::Create(const AllocateMemoryDesc& allocateMemoryDesc) { VkMemoryAllocateInfo memoryInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; memoryInfo.pNext = &flagsInfo; memoryInfo.allocationSize = allocateMemoryDesc.size; - memoryInfo.memoryTypeIndex = memoryType.unpacked.index; + memoryInfo.memoryTypeIndex = memoryTypeInfo.index; const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.AllocateMemory(m_Device, &memoryInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkAllocateMemory returned %d", (int32_t)result); - if (IsHostVisibleMemory(memoryType.unpacked.location)) { + if (IsHostVisibleMemory(memoryTypeInfo.location)) { result = vk.MapMemory(m_Device, m_Handle, 0, allocateMemoryDesc.size, 0, (void**)&m_MappedMemory); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkMapMemory returned %d", (int32_t)result); } @@ -70,13 +71,11 @@ Result MemoryVK::Create(const AllocateMemoryDesc& allocateMemoryDesc) { } Result MemoryVK::CreateDedicated(const BufferVK& buffer) { - m_OwnsNativeObjects = true; - - MemoryTypeUnion memoryType = {m_Type}; - CHECK(m_Type != std::numeric_limits::max() && memoryType.unpacked.isDedicated, "Shouldn't be there"); + MemoryTypeInfo memoryTypeInfo = Unpack(m_Type); + CHECK(m_Type != std::numeric_limits::max() && memoryTypeInfo.mustBeDedicated, "Shouldn't be there"); MemoryDesc memoryDesc = {}; - m_Device.GetMemoryDesc(buffer.GetDesc(), memoryType.unpacked.location, memoryDesc); + m_Device.GetMemoryDesc(buffer.GetDesc(), memoryTypeInfo.location, memoryDesc); VkMemoryPriorityAllocateInfoEXT priorityInfo = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT}; priorityInfo.priority = m_Priority; @@ -92,13 +91,13 @@ Result MemoryVK::CreateDedicated(const BufferVK& buffer) { VkMemoryAllocateInfo memoryInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; memoryInfo.pNext = &dedicatedAllocateInfo; memoryInfo.allocationSize = memoryDesc.size; - memoryInfo.memoryTypeIndex = memoryType.unpacked.index; + memoryInfo.memoryTypeIndex = memoryTypeInfo.index; const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.AllocateMemory(m_Device, &memoryInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkAllocateMemory returned %d", (int32_t)result); - if (IsHostVisibleMemory(memoryType.unpacked.location)) { + if (IsHostVisibleMemory(memoryTypeInfo.location)) { result = vk.MapMemory(m_Device, m_Handle, 0, memoryDesc.size, 0, (void**)&m_MappedMemory); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkMapMemory returned %d", (int32_t)result); } @@ -107,13 +106,11 @@ Result MemoryVK::CreateDedicated(const BufferVK& buffer) { } Result MemoryVK::CreateDedicated(const TextureVK& texture) { - m_OwnsNativeObjects = true; - - const MemoryTypeUnion memoryType = {m_Type}; - CHECK(m_Type != std::numeric_limits::max() && memoryType.unpacked.isDedicated, "Shouldn't be there"); + MemoryTypeInfo memoryTypeInfo = Unpack(m_Type); + CHECK(m_Type != std::numeric_limits::max() && memoryTypeInfo.mustBeDedicated, "Shouldn't be there"); MemoryDesc memoryDesc = {}; - m_Device.GetMemoryDesc(texture.GetDesc(), memoryType.unpacked.location, memoryDesc); + m_Device.GetMemoryDesc(texture.GetDesc(), memoryTypeInfo.location, memoryDesc); VkMemoryPriorityAllocateInfoEXT priorityInfo = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT}; priorityInfo.priority = m_Priority; @@ -129,13 +126,13 @@ Result MemoryVK::CreateDedicated(const TextureVK& texture) { VkMemoryAllocateInfo memoryInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; memoryInfo.pNext = &dedicatedAllocateInfo; memoryInfo.allocationSize = memoryDesc.size; - memoryInfo.memoryTypeIndex = memoryType.unpacked.index; + memoryInfo.memoryTypeIndex = memoryTypeInfo.index; const auto& vk = m_Device.GetDispatchTable(); VkResult result = vk.AllocateMemory(m_Device, &memoryInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkAllocateMemory returned %d", (int32_t)result); - if (IsHostVisibleMemory(memoryType.unpacked.location)) { + if (IsHostVisibleMemory(memoryTypeInfo.location)) { result = vk.MapMemory(m_Device, m_Handle, 0, memoryDesc.size, 0, (void**)&m_MappedMemory); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkMapMemory returned %d", (int32_t)result); } diff --git a/Source/VK/MemoryVK.h b/Source/VK/MemoryVK.h index 75664ee0..70c6cc9f 100644 --- a/Source/VK/MemoryVK.h +++ b/Source/VK/MemoryVK.h @@ -30,10 +30,6 @@ struct MemoryVK { return m_MappedMemory; } - inline bool OwnsNativeObjects() const { - return m_OwnsNativeObjects; - } - ~MemoryVK(); Result Create(const MemoryVKDesc& memoryDesc); @@ -53,7 +49,7 @@ struct MemoryVK { uint8_t* m_MappedMemory = nullptr; MemoryType m_Type = std::numeric_limits::max(); float m_Priority = 0.0f; - bool m_OwnsNativeObjects = false; + bool m_OwnsNativeObjects = true; }; } // namespace nri diff --git a/Source/VK/PipelineLayoutVK.cpp b/Source/VK/PipelineLayoutVK.cpp index 163f0782..b9946bfd 100644 --- a/Source/VK/PipelineLayoutVK.cpp +++ b/Source/VK/PipelineLayoutVK.cpp @@ -111,7 +111,7 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { setNum++; // Allocate temp memory for ALL "register spaces" making the entire range consecutive (thanks VK API!) - VkDescriptorSetLayout* descriptorSetLayouts = ALLOCATE_SCRATCH(m_Device, VkDescriptorSetLayout, setNum); + Scratch descriptorSetLayouts = AllocateScratch(m_Device, VkDescriptorSetLayout, setNum); // Create "empty" set layout (needed only if "register space" indices are not consecutive) if (setNum != pipelineLayoutDesc.descriptorSetNum) { @@ -129,7 +129,7 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { } // Push constants - VkPushConstantRange* pushConstantRanges = ALLOCATE_SCRATCH(m_Device, VkPushConstantRange, pipelineLayoutDesc.pushConstantNum); + Scratch pushConstantRanges = AllocateScratch(m_Device, VkPushConstantRange, pipelineLayoutDesc.pushConstantNum); FillPushConstantRanges(pipelineLayoutDesc, pushConstantRanges); // Create pipeline layout @@ -143,9 +143,6 @@ Result PipelineLayoutVK::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { VkResult result = vk.CreatePipelineLayout(m_Device, &pipelineLayoutCreateInfo, m_Device.GetAllocationCallbacks(), &m_Handle); // Cleanup - FREE_SCRATCH(m_Device, pushConstantRanges, pipelineLayoutDesc.pushConstantNum); - FREE_SCRATCH(m_Device, descriptorSetLayouts, setNum); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, Result::FAILURE, "vkCreatePipelineLayout returned %d", (int32_t)result); FillRuntimeBindingInfo(pipelineLayoutDesc, bindingOffsets); @@ -180,15 +177,17 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& bindingMaxNum += range.isArray ? 1 : range.descriptorNum; } - VkDescriptorSetLayoutBinding* bindings = ALLOCATE_SCRATCH(m_Device, VkDescriptorSetLayoutBinding, bindingMaxNum); - VkDescriptorBindingFlags* bindingFlags = ALLOCATE_SCRATCH(m_Device, VkDescriptorBindingFlags, bindingMaxNum); + Scratch bindings = AllocateScratch(m_Device, VkDescriptorSetLayoutBinding, bindingMaxNum); + Scratch bindingFlags = AllocateScratch(m_Device, VkDescriptorBindingFlags, bindingMaxNum); VkDescriptorSetLayoutBinding* bindingsBegin = bindings; VkDescriptorBindingFlags* bindingFlagsBegin = bindingFlags; - FillDescriptorBindings(descriptorSetDesc, bindingOffsets, bindings, bindingFlags); - FillDynamicConstantBufferBindings(descriptorSetDesc, bindingOffsets, bindings, bindingFlags); + VkDescriptorSetLayoutBinding* bindingsEnd = bindings; + VkDescriptorBindingFlags* bindingFlagsEnd = bindingFlags; + FillDescriptorBindings(descriptorSetDesc, bindingOffsets, bindingsEnd, bindingFlagsEnd); + FillDynamicConstantBufferBindings(descriptorSetDesc, bindingOffsets, bindingsEnd, bindingFlagsEnd); - const uint32_t bindingNum = uint32_t(bindings - bindingsBegin); + const uint32_t bindingNum = uint32_t(bindingsEnd - bindingsBegin); VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO}; bindingFlagsInfo.bindingCount = bindingNum; @@ -204,9 +203,6 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& VkResult result = vk.CreateDescriptorSetLayout(m_Device, &info, m_Device.GetAllocationCallbacks(), &handle); // Cleanup - FREE_SCRATCH(m_Device, bindingsBegin, bindingMaxNum); - FREE_SCRATCH(m_Device, bindingFlagsBegin, bindingMaxNum); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, 0, "vkCreateDescriptorSetLayout returned %d", (int32_t)result); return handle; diff --git a/Source/VK/PipelineVK.cpp b/Source/VK/PipelineVK.cpp index f151eb21..0a58722a 100644 --- a/Source/VK/PipelineVK.cpp +++ b/Source/VK/PipelineVK.cpp @@ -13,21 +13,18 @@ static inline bool IsConstantColorReferenced(BlendFactor factor) { } PipelineVK::~PipelineVK() { - if (!m_OwnsNativeObjects) - return; - - const auto& vk = m_Device.GetDispatchTable(); - if (m_Handle != VK_NULL_HANDLE) + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); vk.DestroyPipeline(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { - m_OwnsNativeObjects = true; m_BindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; // Shaders - VkPipelineShaderStageCreateInfo* stages = STACK_ALLOC(VkPipelineShaderStageCreateInfo, graphicsPipelineDesc.shaderNum); - VkShaderModule* modules = STACK_ALLOC(VkShaderModule, graphicsPipelineDesc.shaderNum); + VkPipelineShaderStageCreateInfo* stages = StackAlloc(VkPipelineShaderStageCreateInfo, graphicsPipelineDesc.shaderNum); + VkShaderModule* modules = StackAlloc(VkShaderModule, graphicsPipelineDesc.shaderNum); VkShaderModule* modulesBegin = modules; for (uint32_t i = 0; i < graphicsPipelineDesc.shaderNum; i++) { @@ -43,8 +40,8 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { const VertexInputDesc* vi = graphicsPipelineDesc.vertexInput; VkPipelineVertexInputStateCreateInfo vertexInputState = {VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO}; - vertexInputState.pVertexAttributeDescriptions = STACK_ALLOC(VkVertexInputAttributeDescription, vi ? vi->attributeNum : 0); - vertexInputState.pVertexBindingDescriptions = STACK_ALLOC(VkVertexInputBindingDescription, vi ? vi->streamNum : 0); + vertexInputState.pVertexAttributeDescriptions = StackAlloc(VkVertexInputAttributeDescription, vi ? vi->attributeNum : 0); + vertexInputState.pVertexBindingDescriptions = StackAlloc(VkVertexInputBindingDescription, vi ? vi->streamNum : 0); if (vi) { vertexInputState.vertexAttributeDescriptionCount = vi->attributeNum; vertexInputState.vertexBindingDescriptionCount = vi->streamNum; @@ -163,7 +160,7 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { colorBlendState.logicOpEnable = om.colorLogicFunc != LogicFunc::NONE; colorBlendState.logicOp = GetLogicOp(om.colorLogicFunc); colorBlendState.attachmentCount = om.colorNum; - colorBlendState.pAttachments = STACK_ALLOC(VkPipelineColorBlendAttachmentState, om.colorNum); + colorBlendState.pAttachments = StackAlloc(VkPipelineColorBlendAttachmentState, om.colorNum); bool isConstantColorReferenced = false; VkPipelineColorBlendAttachmentState* attachments = const_cast(colorBlendState.pAttachments); @@ -187,7 +184,7 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { } // Formats - VkFormat* colorFormats = STACK_ALLOC(VkFormat, om.colorNum); + VkFormat* colorFormats = StackAlloc(VkFormat, om.colorNum); for (uint32_t i = 0; i < om.colorNum; i++) colorFormats[i] = GetVkFormat(om.color[i].format); @@ -252,7 +249,6 @@ Result PipelineVK::Create(const GraphicsPipelineDesc& graphicsPipelineDesc) { } Result PipelineVK::Create(const ComputePipelineDesc& computePipelineDesc) { - m_OwnsNativeObjects = true; m_BindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; const PipelineLayoutVK& pipelineLayoutVK = *(const PipelineLayoutVK*)computePipelineDesc.pipelineLayout; @@ -292,14 +288,13 @@ Result PipelineVK::Create(const ComputePipelineDesc& computePipelineDesc) { } Result PipelineVK::Create(const RayTracingPipelineDesc& rayTracingPipelineDesc) { - m_OwnsNativeObjects = true; m_BindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; const PipelineLayoutVK& pipelineLayoutVK = *(const PipelineLayoutVK*)rayTracingPipelineDesc.pipelineLayout; const uint32_t stageNum = rayTracingPipelineDesc.shaderLibrary->shaderNum; - VkPipelineShaderStageCreateInfo* stages = STACK_ALLOC(VkPipelineShaderStageCreateInfo, stageNum); - VkShaderModule* modules = STACK_ALLOC(VkShaderModule, stageNum); + VkPipelineShaderStageCreateInfo* stages = StackAlloc(VkPipelineShaderStageCreateInfo, stageNum); + VkShaderModule* modules = StackAlloc(VkShaderModule, stageNum); VkShaderModule* modulesBegin = modules; for (uint32_t i = 0; i < stageNum; i++) { @@ -311,7 +306,7 @@ Result PipelineVK::Create(const RayTracingPipelineDesc& rayTracingPipelineDesc) stages[i].pName = shaderDesc.entryPointName ? shaderDesc.entryPointName : "main"; } - VkRayTracingShaderGroupCreateInfoKHR* groupArray = STACK_ALLOC(VkRayTracingShaderGroupCreateInfoKHR, rayTracingPipelineDesc.shaderGroupDescNum); + VkRayTracingShaderGroupCreateInfoKHR* groupArray = StackAlloc(VkRayTracingShaderGroupCreateInfoKHR, rayTracingPipelineDesc.shaderGroupDescNum); for (uint32_t i = 0; i < rayTracingPipelineDesc.shaderGroupDescNum; i++) { VkRayTracingShaderGroupCreateInfoKHR& dstGroup = groupArray[i]; const ShaderGroupDesc& srcGroup = rayTracingPipelineDesc.shaderGroupDescs[i]; @@ -380,24 +375,13 @@ Result PipelineVK::Create(const RayTracingPipelineDesc& rayTracingPipelineDesc) return Result::SUCCESS; } -Result PipelineVK::CreateGraphics(NRIVkPipeline vkPipeline) { - if (!vkPipeline) - return Result::INVALID_ARGUMENT; - - m_OwnsNativeObjects = false; - m_Handle = (VkPipeline)vkPipeline; - m_BindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - - return Result::SUCCESS; -} - -Result PipelineVK::CreateCompute(NRIVkPipeline vkPipeline) { +Result PipelineVK::Create(VkPipelineBindPoint bindPoint, VKNonDispatchableHandle vkPipeline) { if (!vkPipeline) return Result::INVALID_ARGUMENT; m_OwnsNativeObjects = false; m_Handle = (VkPipeline)vkPipeline; - m_BindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + m_BindPoint = bindPoint; return Result::SUCCESS; } diff --git a/Source/VK/PipelineVK.h b/Source/VK/PipelineVK.h index 49b3794b..7521c224 100644 --- a/Source/VK/PipelineVK.h +++ b/Source/VK/PipelineVK.h @@ -28,8 +28,7 @@ struct PipelineVK { Result Create(const GraphicsPipelineDesc& graphicsPipelineDesc); Result Create(const ComputePipelineDesc& computePipelineDesc); Result Create(const RayTracingPipelineDesc& rayTracingPipelineDesc); - Result CreateGraphics(NRIVkPipeline vkPipeline); - Result CreateCompute(NRIVkPipeline vkPipeline); + Result Create(VkPipelineBindPoint bindPoint, VKNonDispatchableHandle vkPipeline); //================================================================================================================ // NRI @@ -45,7 +44,7 @@ struct PipelineVK { DeviceVK& m_Device; VkPipeline m_Handle = VK_NULL_HANDLE; VkPipelineBindPoint m_BindPoint = (VkPipelineBindPoint)0; - bool m_OwnsNativeObjects = false; + bool m_OwnsNativeObjects = true; }; } // namespace nri \ No newline at end of file diff --git a/Source/VK/PipelineVK.hpp b/Source/VK/PipelineVK.hpp index ed579110..fdc0a510 100644 --- a/Source/VK/PipelineVK.hpp +++ b/Source/VK/PipelineVK.hpp @@ -6,8 +6,8 @@ static Result NRI_CALL WriteShaderGroupIdentifiers(const Pipeline& pipeline, uin return ((const PipelineVK&)pipeline).WriteShaderGroupIdentifiers(baseShaderGroupIndex, shaderGroupNum, buffer); } -void FillFunctionTablePipelineVK(RayTracingInterface& rayTracingInterface) { - rayTracingInterface.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; +void FillFunctionTablePipelineVK(RayTracingInterface& table) { + table.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; } #pragma endregion diff --git a/Source/VK/QueryPoolVK.cpp b/Source/VK/QueryPoolVK.cpp index 6a7fc75e..56ccc836 100644 --- a/Source/VK/QueryPoolVK.cpp +++ b/Source/VK/QueryPoolVK.cpp @@ -7,15 +7,13 @@ using namespace nri; QueryPoolVK::~QueryPoolVK() { - const auto& vk = m_Device.GetDispatchTable(); - - if (m_OwnsNativeObjects) + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); vk.DestroyQueryPool(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result QueryPoolVK::Create(const QueryPoolDesc& queryPoolDesc) { - m_OwnsNativeObjects = true; - if (queryPoolDesc.queryType == QueryType::TIMESTAMP || queryPoolDesc.queryType == QueryType::TIMESTAMP_COPY_QUEUE) m_Type = VK_QUERY_TYPE_TIMESTAMP; else if (queryPoolDesc.queryType == QueryType::OCCLUSION) @@ -49,6 +47,9 @@ Result QueryPoolVK::Create(const QueryPoolDesc& queryPoolDesc) { } Result QueryPoolVK::Create(const QueryPoolVKDesc& queryPoolDesc) { + if (!queryPoolDesc.vkQueryPool) + return Result::INVALID_ARGUMENT; + m_OwnsNativeObjects = false; m_Type = (VkQueryType)queryPoolDesc.vkQueryType; m_Handle = (VkQueryPool)queryPoolDesc.vkQueryPool; diff --git a/Source/VK/QueryPoolVK.h b/Source/VK/QueryPoolVK.h index ef917cb2..c071762a 100644 --- a/Source/VK/QueryPoolVK.h +++ b/Source/VK/QueryPoolVK.h @@ -42,7 +42,7 @@ struct QueryPoolVK { VkQueryPool m_Handle = VK_NULL_HANDLE; VkQueryType m_Type = (VkQueryType)0; uint32_t m_QuerySize = 0; - bool m_OwnsNativeObjects = false; + bool m_OwnsNativeObjects = true; }; } // namespace nri \ No newline at end of file diff --git a/Source/VK/ResourceAllocatorVK.cpp b/Source/VK/ResourceAllocatorVK.cpp new file mode 100644 index 00000000..9855b2f8 --- /dev/null +++ b/Source/VK/ResourceAllocatorVK.cpp @@ -0,0 +1,169 @@ +// © 2021 NVIDIA Corporation + +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#define VMA_IMPLEMENTATION +#include "memalloc/vk_mem_alloc.h" + +#include "SharedVK.h" + +#include "AccelerationStructureVK.h" +#include "BufferVK.h" +#include "TextureVK.h" + +using namespace nri; + +Result DeviceVK::CreateVma() { + if (m_Vma) + return Result::SUCCESS; + + VmaVulkanFunctions vulkanFunctions = {}; + vulkanFunctions.vkGetInstanceProcAddr = m_VK.GetInstanceProcAddr; + vulkanFunctions.vkGetDeviceProcAddr = m_VK.GetDeviceProcAddr; + + VmaAllocatorCreateInfo allocatorCreateInfo = {}; + allocatorCreateInfo.vulkanApiVersion = m_MinorVersion >= 3 ? VK_API_VERSION_1_3 : VK_API_VERSION_1_2; + allocatorCreateInfo.physicalDevice = m_PhysicalDevice; + allocatorCreateInfo.device = m_Device; + allocatorCreateInfo.instance = m_Instance; + allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; + allocatorCreateInfo.pAllocationCallbacks = m_AllocationCallbackPtr; + + if (m_IsMemoryBudgetSupported) + allocatorCreateInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + if (m_IsDeviceAddressSupported) + allocatorCreateInfo.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT; + if (m_IsMemoryPrioritySupported) + allocatorCreateInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT; + if (m_MinorVersion >= 3) + allocatorCreateInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT; + if (m_IsMaintenance5Supported) + allocatorCreateInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT; + + VkResult result = vmaCreateAllocator(&allocatorCreateInfo, &m_Vma); + RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vmaCreateAllocator returned %d", (int32_t)result); + + return Result::SUCCESS; +} + +Result BufferVK::Create(const AllocateBufferDesc& bufferDesc) { + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + // Fill info + VkBufferCreateInfo bufferCreateInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + m_Device.FillCreateInfo(bufferDesc.desc, bufferCreateInfo); + + // Create + VmaAllocationCreateInfo allocationCreateInfo = {}; + allocationCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT; + allocationCreateInfo.priority = bufferDesc.memoryPriority * 0.5f + 0.5f; + allocationCreateInfo.usage = IsHostMemory(bufferDesc.memoryLocation) ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST : VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + + if (IsHostVisibleMemory(bufferDesc.memoryLocation)) { + allocationCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + + if (bufferDesc.memoryLocation == MemoryLocation::HOST_READBACK) + allocationCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + else + allocationCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + } + + VmaAllocationInfo allocationInfo = {}; + VkResult result = vmaCreateBuffer(m_Device.GetVma(), &bufferCreateInfo, &allocationCreateInfo, &m_Handle, &m_VmaAllocation, &allocationInfo); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vmaCreateBuffer returned %d", (int32_t)result); + + // Mapped memory + if (IsHostVisibleMemory(bufferDesc.memoryLocation)) { + m_MappedMemory = (uint8_t*)allocationInfo.pMappedData - allocationInfo.offset; + m_MappedMemoryOffset = allocationInfo.offset; + + uint32_t memoryTypeIndex = 0; + result = vmaFindMemoryTypeIndexForBufferInfo(m_Device.GetVma(), &bufferCreateInfo, &allocationCreateInfo, &memoryTypeIndex); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vmaFindMemoryTypeIndexForBufferInfo returned %d", (int32_t)result); + + if (!m_Device.IsHostCoherentMemory((MemoryTypeIndex)memoryTypeIndex)) + m_NonCoherentDeviceMemory = allocationInfo.deviceMemory; + } + + // Device address + if (m_Device.m_IsDeviceAddressSupported) { + VkBufferDeviceAddressInfo bufferDeviceAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO}; + bufferDeviceAddressInfo.buffer = m_Handle; + + const auto& vk = m_Device.GetDispatchTable(); + m_DeviceAddress = vk.GetBufferDeviceAddress(m_Device, &bufferDeviceAddressInfo); + } + + m_Desc = bufferDesc.desc; + + return Result::SUCCESS; +} + +Result TextureVK::Create(const AllocateTextureDesc& textureDesc) { + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + // Fill info + VkImageCreateInfo imageCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + m_Device.FillCreateInfo(textureDesc.desc, imageCreateInfo); + + // Create + VmaAllocationCreateInfo allocationCreateInfo = {}; + allocationCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT; + allocationCreateInfo.priority = textureDesc.memoryPriority * 0.5f + 0.5f; + allocationCreateInfo.usage = IsHostMemory(textureDesc.memoryLocation) ? VMA_MEMORY_USAGE_AUTO_PREFER_HOST : VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + + VkResult result = vmaCreateImage(m_Device.GetVma(), &imageCreateInfo, &allocationCreateInfo, &m_Handle, &m_VmaAllocation, nullptr); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vmaCreateBuffer returned %d", (int32_t)result); + + m_Desc = textureDesc.desc; + + return Result::SUCCESS; +} + +Result AccelerationStructureVK::Create(const AllocateAccelerationStructureDesc& accelerationStructureDesc) { + Result nriResult = m_Device.CreateVma(); + if (nriResult != Result::SUCCESS) + return nriResult; + + VkAccelerationStructureBuildSizesInfoKHR sizesInfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR}; + m_Device.GetAccelerationStructureBuildSizesInfo(accelerationStructureDesc.desc, sizesInfo); + + AllocateBufferDesc bufferDesc = {}; + bufferDesc.memoryLocation = accelerationStructureDesc.memoryLocation; + bufferDesc.memoryPriority = accelerationStructureDesc.memoryPriority; + bufferDesc.desc.size = sizesInfo.accelerationStructureSize; + bufferDesc.desc.usageMask = BufferUsageBits::RAY_TRACING_BUFFER; + + Buffer* buffer = nullptr; + Result result = m_Device.CreateImplementation(buffer, bufferDesc); + if (result == Result::SUCCESS) { + m_Buffer = (BufferVK*)buffer; + m_BuildScratchSize = sizesInfo.buildScratchSize; + m_UpdateScratchSize = sizesInfo.updateScratchSize; + m_Type = GetAccelerationStructureType(accelerationStructureDesc.desc.type); + m_AccelerationStructureSize = sizesInfo.accelerationStructureSize; + + FinishCreation(); + } + + return result; +} + +void DeviceVK::DestroyVma() { + if (m_Vma) + vmaDestroyAllocator(m_Vma); +} + +void BufferVK::DestroyVma() { + CHECK(m_VmaAllocation, "Not a VMA allocation"); + vmaDestroyBuffer(m_Device.GetVma(), m_Handle, m_VmaAllocation); +} + +void TextureVK::DestroyVma() { + CHECK(m_VmaAllocation, "Not a VMA allocation"); + vmaDestroyImage(m_Device.GetVma(), m_Handle, m_VmaAllocation); +} diff --git a/Source/VK/SharedVK.h b/Source/VK/SharedVK.h index 9ace9390..966dbe2f 100644 --- a/Source/VK/SharedVK.h +++ b/Source/VK/SharedVK.h @@ -12,28 +12,33 @@ #include "DispatchTable.h" typedef uint16_t MemoryTypeIndex; + struct MemoryTypeInfo { MemoryTypeIndex index; nri::MemoryLocation location; - bool isDedicated; + bool mustBeDedicated; }; -static_assert(sizeof(MemoryTypeInfo) <= sizeof(nri::MemoryType), "Unexpected structure size"); -union MemoryTypeUnion { - nri::MemoryType packed; - MemoryTypeInfo unpacked; -}; +inline nri::MemoryType Pack(const MemoryTypeInfo& memoryTypeInfo) { + return *(nri::MemoryType*)&memoryTypeInfo; +} + +inline MemoryTypeInfo Unpack(const nri::MemoryType& memoryType) { + return *(MemoryTypeInfo*)&memoryType; +} + +static_assert(sizeof(MemoryTypeInfo) == sizeof(nri::MemoryType), "Must be equal"); template constexpr HandleType GetHandle(NRIType* object) { return (object != nullptr) ? (*(ImplType*)object).GetHandle() : HandleType(VK_NULL_HANDLE); } -constexpr bool IsHostVisibleMemory(nri::MemoryLocation location) { +inline bool IsHostVisibleMemory(nri::MemoryLocation location) { return location > nri::MemoryLocation::DEVICE; } -constexpr bool IsHostMemory(nri::MemoryLocation location) { +inline bool IsHostMemory(nri::MemoryLocation location) { return location > nri::MemoryLocation::DEVICE_UPLOAD; } @@ -47,9 +52,13 @@ constexpr uint32_t INVALID_FAMILY_INDEX = uint32_t(-1); # define IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL VK_IMAGE_LAYOUT_GENERAL #endif +// Requires {} #define APPEND_EXT(desc) \ *tail = &desc; \ tail = &desc.pNext +struct VmaAllocator_T; +struct VmaAllocation_T; + #include "ConversionVK.h" #include "DeviceVK.h" diff --git a/Source/VK/SwapChainVK.cpp b/Source/VK/SwapChainVK.cpp index 6ebb8024..c5942ba6 100644 --- a/Source/VK/SwapChainVK.cpp +++ b/Source/VK/SwapChainVK.cpp @@ -31,14 +31,10 @@ SwapChainVK::SwapChainVK(DeviceVK& device) : m_Textures(device.GetStdAllocator() } SwapChainVK::~SwapChainVK() { - Destroy(); -} - -void SwapChainVK::Destroy() { for (size_t i = 0; i < m_Textures.size(); i++) - Deallocate(m_Device.GetStdAllocator(), m_Textures[i]); + Destroy(m_Device.GetStdAllocator(), m_Textures[i]); - Deallocate(m_Device.GetStdAllocator(), m_LatencyFence); + Destroy(m_Device.GetStdAllocator(), m_LatencyFence); const auto& vk = m_Device.GetDispatchTable(); if (m_Handle) @@ -135,7 +131,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkLatencySurfaceCapabilitiesNV latencySurfaceCapabilities = {VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV}; latencySurfaceCapabilities.presentModeCount = 8; - latencySurfaceCapabilities.pPresentModes = STACK_ALLOC(VkPresentModeKHR, latencySurfaceCapabilities.presentModeCount); + latencySurfaceCapabilities.pPresentModes = StackAlloc(VkPresentModeKHR, latencySurfaceCapabilities.presentModeCount); if (m_Device.m_IsLowLatencySupported) sc.pNext = &latencySurfaceCapabilities; @@ -164,7 +160,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkResult result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, nullptr); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); - VkSurfaceFormatKHR* surfaceFormats = STACK_ALLOC(VkSurfaceFormatKHR, formatNum); + VkSurfaceFormatKHR* surfaceFormats = StackAlloc(VkSurfaceFormatKHR, formatNum); result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, surfaceFormats); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); @@ -189,7 +185,7 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { VkPresentModeKHR presentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; { uint32_t presentModeNum = 8; - VkPresentModeKHR* presentModes = STACK_ALLOC(VkPresentModeKHR, presentModeNum); + VkPresentModeKHR* presentModes = StackAlloc(VkPresentModeKHR, presentModeNum); VkResult result = vk.GetPhysicalDeviceSurfacePresentModesKHR(m_Device, m_Surface, &presentModeNum, presentModes); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfacePresentModesKHR returned %d", (int32_t)result); @@ -213,10 +209,10 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { } if (i != presentModeNum) break; - REPORT_WARNING(&m_Device, "VkPresentModeKHR = %u is not supported...", modes[j]); + REPORT_WARNING(&m_Device, "VkPresentModeKHR = %u is not supported", modes[j]); } if (j == 2) - REPORT_WARNING(&m_Device, "No a suitable present mode found, switching to VK_PRESENT_MODE_IMMEDIATE_KHR..."); + REPORT_WARNING(&m_Device, "No a suitable present mode found, switching to VK_PRESENT_MODE_IMMEDIATE_KHR"); } { // Swap chain @@ -281,15 +277,14 @@ Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { uint32_t imageNum = 0; vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, nullptr); - VkImage* imageHandles = STACK_ALLOC(VkImage, imageNum); + VkImage* imageHandles = StackAlloc(VkImage, imageNum); vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, imageHandles); m_Textures.resize(imageNum); for (uint32_t i = 0; i < imageNum; i++) { TextureVKDesc desc = {}; - desc.vkImage = (NRIVkImage)imageHandles[i]; + desc.vkImage = (VKNonDispatchableHandle)imageHandles[i]; desc.vkFormat = surfaceFormat.format; - desc.vkImageAspectFlags = VK_IMAGE_ASPECT_COLOR_BIT; desc.vkImageType = VK_IMAGE_TYPE_2D; desc.width = swapChainDesc.width; desc.height = swapChainDesc.height; diff --git a/Source/VK/SwapChainVK.h b/Source/VK/SwapChainVK.h index 1b22c66b..f6bfc1c3 100644 --- a/Source/VK/SwapChainVK.h +++ b/Source/VK/SwapChainVK.h @@ -45,9 +45,6 @@ struct SwapChainVK : public DisplayDescHelper { Result LatencySleep(); Result GetLatencyReport(LatencyReport& latencyReport); -private: - void Destroy(); - private: Vector m_Textures; FenceVK* m_LatencyFence = nullptr; diff --git a/Source/VK/SwapChainVK.hpp b/Source/VK/SwapChainVK.hpp index 4ad3dd3a..c1962404 100644 --- a/Source/VK/SwapChainVK.hpp +++ b/Source/VK/SwapChainVK.hpp @@ -44,5 +44,5 @@ static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latenc #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(VK); -Define_LowLatency_SwapChain_PartiallyFillFunctionTable(VK); +Define_SwapChain_SwapChain_PartiallyFillFunctionTable(VK); +Define_LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/TextureVK.cpp b/Source/VK/TextureVK.cpp index 950578d9..c3b184fe 100644 --- a/Source/VK/TextureVK.cpp +++ b/Source/VK/TextureVK.cpp @@ -8,17 +8,17 @@ using namespace nri; TextureVK::~TextureVK() { - const auto& vk = m_Device.GetDispatchTable(); - - if (m_OwnsNativeObjects) - vk.DestroyImage(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + if (m_OwnsNativeObjects) { + const auto& vk = m_Device.GetDispatchTable(); + + if (m_VmaAllocation) + DestroyVma(); + else + vk.DestroyImage(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); + } } Result TextureVK::Create(const TextureDesc& textureDesc) { - m_OwnsNativeObjects = true; - m_ImageAspectFlags = ::GetImageAspectFlags(textureDesc.format); - m_Desc = textureDesc; - VkImageCreateInfo info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; m_Device.FillCreateInfo(textureDesc, info); @@ -26,6 +26,8 @@ Result TextureVK::Create(const TextureDesc& textureDesc) { VkResult result = vk.CreateImage(m_Device, &info, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateImage returned %d", (int32_t)result); + m_Desc = textureDesc; + return Result::SUCCESS; } @@ -33,8 +35,6 @@ Result TextureVK::Create(const TextureVKDesc& textureDesc) { if (!textureDesc.vkImage) return Result::INVALID_ARGUMENT; - m_OwnsNativeObjects = false; - m_ImageAspectFlags = (VkImageAspectFlags)textureDesc.vkImageAspectFlags; m_Desc.type = GetTextureType((VkImageType)textureDesc.vkImageType); m_Desc.usageMask = (TextureUsageBits)(-1); // TODO: it's not right... m_Desc.format = VKFormatToNRIFormat((VkFormat)textureDesc.vkFormat); @@ -45,11 +45,16 @@ Result TextureVK::Create(const TextureVKDesc& textureDesc) { m_Desc.arraySize = textureDesc.arraySize; m_Desc.sampleNum = textureDesc.sampleNum; + m_OwnsNativeObjects = false; m_Handle = (VkImage)textureDesc.vkImage; return Result::SUCCESS; } +VkImageAspectFlags TextureVK::GetImageAspectFlags() const { + return ::GetImageAspectFlags(m_Desc.format); +} + //================================================================================================================ // NRI //================================================================================================================ diff --git a/Source/VK/TextureVK.h b/Source/VK/TextureVK.h index 5ad296a6..5fa03ed1 100644 --- a/Source/VK/TextureVK.h +++ b/Source/VK/TextureVK.h @@ -18,10 +18,6 @@ struct TextureVK { return m_Device; } - inline VkImageAspectFlags GetImageAspectFlags() const { - return m_ImageAspectFlags; - } - inline VkExtent3D GetExtent() const { return {m_Desc.width, m_Desc.height, m_Desc.depth}; } @@ -34,14 +30,13 @@ struct TextureVK { return GetDimension(GraphicsAPI::VK, m_Desc, dimensionIndex, mip); } - inline bool OwnsNativeObjects() const { - return m_OwnsNativeObjects; - } - ~TextureVK(); Result Create(const TextureDesc& textureDesc); Result Create(const TextureVKDesc& textureDesc); + Result Create(const AllocateTextureDesc& textureDesc); + VkImageAspectFlags GetImageAspectFlags() const; + void DestroyVma(); //================================================================================================================ // NRI @@ -53,8 +48,8 @@ struct TextureVK { DeviceVK& m_Device; VkImage m_Handle = VK_NULL_HANDLE; TextureDesc m_Desc = {}; - VkImageAspectFlags m_ImageAspectFlags = (VkImageAspectFlags)0; - bool m_OwnsNativeObjects = false; + VmaAllocation_T* m_VmaAllocation = nullptr; + bool m_OwnsNativeObjects = true; }; } // namespace nri \ No newline at end of file diff --git a/Source/Validation/AccelerationStructureVal.cpp b/Source/Validation/AccelerationStructureVal.cpp index ea161e36..e81f655d 100644 --- a/Source/Validation/AccelerationStructureVal.cpp +++ b/Source/Validation/AccelerationStructureVal.cpp @@ -11,17 +11,12 @@ using namespace nri; AccelerationStructureVal::~AccelerationStructureVal() { - if (m_Memory != nullptr) + if (m_Memory) m_Memory->UnbindAccelerationStructure(*this); GetRayTracingInterface().DestroyAccelerationStructure(*GetImpl()); } -void AccelerationStructureVal::GetMemoryDesc(MemoryDesc& memoryDesc) const { - GetRayTracingInterface().GetAccelerationStructureMemoryDesc(*GetImpl(), memoryDesc); - m_Device.RegisterMemoryType(memoryDesc.type, MemoryLocation::DEVICE); -} - uint64_t AccelerationStructureVal::GetUpdateScratchBufferSize() const { return GetRayTracingInterface().GetAccelerationStructureUpdateScratchBufferSize(*GetImpl()); } @@ -46,10 +41,8 @@ Result AccelerationStructureVal::CreateDescriptor(Descriptor*& descriptor) { Descriptor* descriptorImpl = nullptr; const Result result = GetRayTracingInterface().CreateAccelerationStructureDescriptor(*GetImpl(), descriptorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(&m_Device, descriptorImpl != nullptr, Result::FAILURE, "CreateAccelerationStructureDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) descriptor = (Descriptor*)Allocate(m_Device.GetStdAllocator(), m_Device, descriptorImpl, ResourceType::ACCELERATION_STRUCTURE); - } return result; } diff --git a/Source/Validation/AccelerationStructureVal.h b/Source/Validation/AccelerationStructureVal.h index 82f9da0b..2b613a18 100644 --- a/Source/Validation/AccelerationStructureVal.h +++ b/Source/Validation/AccelerationStructureVal.h @@ -7,8 +7,8 @@ namespace nri { struct MemoryVal; struct AccelerationStructureVal final : public DeviceObjectVal { - AccelerationStructureVal(DeviceVal& device, AccelerationStructure* accelerationStructure, bool isBoundToMemory) : - DeviceObjectVal(device, accelerationStructure), m_IsBoundToMemory(isBoundToMemory) { + AccelerationStructureVal(DeviceVal& device, AccelerationStructure* accelerationStructure, bool isBoundToMemory, const MemoryDesc& memoryDesc) : + DeviceObjectVal(device, accelerationStructure), m_IsBoundToMemory(isBoundToMemory), m_MemoryDesc(memoryDesc) { } ~AccelerationStructureVal(); @@ -22,10 +22,13 @@ struct AccelerationStructureVal final : public DeviceObjectVal { - BufferVal(DeviceVal& device, Buffer* buffer) : DeviceObjectVal(device, buffer) { + BufferVal(DeviceVal& device, Buffer* buffer, bool isBoundToMemory) : DeviceObjectVal(device, buffer), m_IsBoundToMemory(isBoundToMemory) { } ~BufferVal(); @@ -24,12 +24,8 @@ struct BufferVal final : public DeviceObjectVal { return m_IsBoundToMemory; } - inline void SetBoundToMemory() { - m_IsBoundToMemory = true; - } - - inline void SetBoundToMemory(MemoryVal& memory) { - m_Memory = &memory; + inline void SetBoundToMemory(MemoryVal* memory = nullptr) { + m_Memory = memory; m_IsBoundToMemory = true; } diff --git a/Source/Validation/CommandAllocatorVal.cpp b/Source/Validation/CommandAllocatorVal.cpp index f38c40c3..b3ef3e53 100644 --- a/Source/Validation/CommandAllocatorVal.cpp +++ b/Source/Validation/CommandAllocatorVal.cpp @@ -17,10 +17,8 @@ Result CommandAllocatorVal::CreateCommandBuffer(CommandBuffer*& commandBuffer) { CommandBuffer* commandBufferImpl; const Result result = GetCoreInterface().CreateCommandBuffer(*GetImpl(), commandBufferImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(&m_Device, commandBufferImpl != nullptr, Result::FAILURE, "CreateCommandBuffer: 'impl' is NULL!"); + if (result == Result::SUCCESS) commandBuffer = (CommandBuffer*)Allocate(m_Device.GetStdAllocator(), m_Device, commandBufferImpl, false); - } return result; } diff --git a/Source/Validation/CommandBufferVal.cpp b/Source/Validation/CommandBufferVal.cpp index b6379400..4d3fac9b 100644 --- a/Source/Validation/CommandBufferVal.cpp +++ b/Source/Validation/CommandBufferVal.cpp @@ -163,7 +163,7 @@ void CommandBufferVal::BeginRendering(const AttachmentsDesc& attachmentsDesc) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "CmdBeginRendering: the command buffer must be in the recording state"); RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "CmdBeginRendering: 'CmdBeginRendering' has been already called"); - Descriptor** colors = STACK_ALLOC(Descriptor*, attachmentsDesc.colorNum); + Descriptor** colors = StackAlloc(Descriptor*, attachmentsDesc.colorNum); for (uint32_t i = 0; i < attachmentsDesc.colorNum; i++) colors[i] = NRI_GET_IMPL(Descriptor, attachmentsDesc.colors[i]); @@ -189,7 +189,7 @@ void CommandBufferVal::EndRendering() { void CommandBufferVal::SetVertexBuffers(uint32_t baseSlot, uint32_t bufferNum, const Buffer* const* buffers, const uint64_t* offsets) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "CmdSetVertexBuffers: the command buffer must be in the recording state"); - Buffer** buffersImpl = STACK_ALLOC(Buffer*, bufferNum); + Buffer** buffersImpl = StackAlloc(Buffer*, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) buffersImpl[i] = NRI_GET_IMPL(Buffer, buffers[i]); @@ -361,12 +361,12 @@ void CommandBufferVal::Barrier(const BarrierGroupDesc& barrierGroupDesc) { return; } - BufferBarrierDesc* buffers = STACK_ALLOC(BufferBarrierDesc, barrierGroupDesc.bufferNum); + BufferBarrierDesc* buffers = StackAlloc(BufferBarrierDesc, barrierGroupDesc.bufferNum); memcpy(buffers, barrierGroupDesc.buffers, sizeof(BufferBarrierDesc) * barrierGroupDesc.bufferNum); for (uint32_t i = 0; i < barrierGroupDesc.bufferNum; i++) buffers[i].buffer = NRI_GET_IMPL(Buffer, barrierGroupDesc.buffers[i].buffer); - TextureBarrierDesc* textures = STACK_ALLOC(TextureBarrierDesc, barrierGroupDesc.textureNum); + TextureBarrierDesc* textures = StackAlloc(TextureBarrierDesc, barrierGroupDesc.textureNum); memcpy(textures, barrierGroupDesc.textures, sizeof(TextureBarrierDesc) * barrierGroupDesc.textureNum); for (uint32_t i = 0; i < barrierGroupDesc.textureNum; i++) textures[i].texture = NRI_GET_IMPL(Texture, barrierGroupDesc.textures[i].texture); @@ -465,11 +465,6 @@ void CommandBufferVal::EndAnnotation() { m_AnnotationStack--; } -void CommandBufferVal::Destroy() { - GetCoreInterface().DestroyCommandBuffer(*GetImpl()); - Deallocate(m_Device.GetStdAllocator(), this); -} - void CommandBufferVal::BuildTopLevelAccelerationStructure( uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "CmdBuildTopLevelAccelerationStructure: the command buffer must be in the recording state"); @@ -504,10 +499,10 @@ void CommandBufferVal::BuildBottomLevelAccelerationStructure( AccelerationStructure& dstImpl = *NRI_GET_IMPL(AccelerationStructure, &dst); Buffer& scratchImpl = *NRI_GET_IMPL(Buffer, &scratch); - Vector objectImplArray(geometryObjectNum, m_Device.GetStdAllocator()); - ConvertGeometryObjectsVal(objectImplArray.data(), geometryObjects, geometryObjectNum); + Scratch objectImplArray = AllocateScratch(m_Device, GeometryObject, geometryObjectNum); + ConvertGeometryObjectsVal(objectImplArray, geometryObjects, geometryObjectNum); - GetRayTracingInterface().CmdBuildBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdBuildBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray, flags, dstImpl, scratchImpl, scratchOffset); } void CommandBufferVal::UpdateTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, @@ -547,10 +542,10 @@ void CommandBufferVal::UpdateBottomLevelAccelerationStructure(uint32_t geometryO AccelerationStructure& srcImpl = *NRI_GET_IMPL(AccelerationStructure, &src); Buffer& scratchImpl = *NRI_GET_IMPL(Buffer, &scratch); - Vector objectImplArray(geometryObjectNum, m_Device.GetStdAllocator()); - ConvertGeometryObjectsVal(objectImplArray.data(), geometryObjects, geometryObjectNum); + Scratch objectImplArray = AllocateScratch(m_Device, GeometryObject, geometryObjectNum); + ConvertGeometryObjectsVal(objectImplArray, geometryObjects, geometryObjectNum); - GetRayTracingInterface().CmdUpdateBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, srcImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdUpdateBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray, flags, dstImpl, srcImpl, scratchImpl, scratchOffset); } void CommandBufferVal::CopyAccelerationStructure(AccelerationStructure& dst, AccelerationStructure& src, CopyMode copyMode) { @@ -570,7 +565,7 @@ void CommandBufferVal::WriteAccelerationStructureSize( RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "CmdWriteAccelerationStructureSize: must be called outside of 'CmdBeginRendering/CmdEndRendering'"); RETURN_ON_FAILURE(&m_Device, accelerationStructures != nullptr, ReturnVoid(), "CmdWriteAccelerationStructureSize: 'accelerationStructures' is NULL"); - AccelerationStructure** accelerationStructureArray = STACK_ALLOC(AccelerationStructure*, accelerationStructureNum); + AccelerationStructure** accelerationStructureArray = StackAlloc(AccelerationStructure*, accelerationStructureNum); for (uint32_t i = 0; i < accelerationStructureNum; i++) { RETURN_ON_FAILURE(&m_Device, accelerationStructures[i] != nullptr, ReturnVoid(), "CmdWriteAccelerationStructureSize: 'accelerationStructures[%u]' is NULL", i); diff --git a/Source/Validation/CommandBufferVal.h b/Source/Validation/CommandBufferVal.h index 541020eb..2cdcdabc 100644 --- a/Source/Validation/CommandBufferVal.h +++ b/Source/Validation/CommandBufferVal.h @@ -58,7 +58,6 @@ struct CommandBufferVal : public DeviceObjectVal { void ResetQueries(const QueryPool& queryPool, uint32_t offset, uint32_t num); void BeginAnnotation(const char* name); void EndAnnotation(); - void Destroy(); void BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset); diff --git a/Source/Validation/CommandBufferVal.hpp b/Source/Validation/CommandBufferVal.hpp index 0ba2c73e..403c8ff9 100644 --- a/Source/Validation/CommandBufferVal.hpp +++ b/Source/Validation/CommandBufferVal.hpp @@ -160,13 +160,6 @@ static void NRI_CALL CmdResetQueries(CommandBuffer& commandBuffer, const QueryPo ((CommandBufferVal&)commandBuffer).ResetQueries(queryPool, offset, num); } -static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { - if (!(&commandBuffer)) - return; - - ((CommandBufferVal&)commandBuffer).Destroy(); -} - static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandBuffer) { if (!(&commandBuffer)) return nullptr; diff --git a/Source/Validation/CommandQueueVal.cpp b/Source/Validation/CommandQueueVal.cpp index f2c04a81..b567c917 100644 --- a/Source/Validation/CommandQueueVal.cpp +++ b/Source/Validation/CommandQueueVal.cpp @@ -72,19 +72,19 @@ void CommandQueueVal::Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapC auto queueSubmitDescImpl = queueSubmitDesc; - FenceSubmitDesc* waitFences = STACK_ALLOC(FenceSubmitDesc, queueSubmitDesc.waitFenceNum); + FenceSubmitDesc* waitFences = StackAlloc(FenceSubmitDesc, queueSubmitDesc.waitFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { waitFences[i] = queueSubmitDesc.waitFences[i]; waitFences[i].fence = NRI_GET_IMPL(Fence, waitFences[i].fence); } queueSubmitDescImpl.waitFences = waitFences; - CommandBuffer** commandBuffers = STACK_ALLOC(CommandBuffer*, queueSubmitDesc.commandBufferNum); + CommandBuffer** commandBuffers = StackAlloc(CommandBuffer*, queueSubmitDesc.commandBufferNum); for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) commandBuffers[i] = NRI_GET_IMPL(CommandBuffer, queueSubmitDesc.commandBuffers[i]); queueSubmitDescImpl.commandBuffers = commandBuffers; - FenceSubmitDesc* signalFences = STACK_ALLOC(FenceSubmitDesc, queueSubmitDesc.signalFenceNum); + FenceSubmitDesc* signalFences = StackAlloc(FenceSubmitDesc, queueSubmitDesc.signalFenceNum); for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { signalFences[i] = queueSubmitDesc.signalFences[i]; signalFences[i].fence = NRI_GET_IMPL(Fence, signalFences[i].fence); @@ -103,7 +103,7 @@ Result CommandQueueVal::UploadData( RETURN_ON_FAILURE(&m_Device, textureUploadDescNum == 0 || textureUploadDescs != nullptr, Result::INVALID_ARGUMENT, "UploadData: 'textureUploadDescs' is NULL"); RETURN_ON_FAILURE(&m_Device, bufferUploadDescNum == 0 || bufferUploadDescs != nullptr, Result::INVALID_ARGUMENT, "UploadData: 'bufferUploadDescs' is NULL"); - TextureUploadDesc* textureUploadDescsImpl = STACK_ALLOC(TextureUploadDesc, textureUploadDescNum); + TextureUploadDesc* textureUploadDescsImpl = StackAlloc(TextureUploadDesc, textureUploadDescNum); for (uint32_t i = 0; i < textureUploadDescNum; i++) { if (!ValidateTextureUploadDesc(m_Device, i, textureUploadDescs[i])) @@ -115,7 +115,7 @@ Result CommandQueueVal::UploadData( textureUploadDescsImpl[i].texture = textureVal->GetImpl(); } - BufferUploadDesc* bufferUploadDescsImpl = STACK_ALLOC(BufferUploadDesc, bufferUploadDescNum); + BufferUploadDesc* bufferUploadDescsImpl = StackAlloc(BufferUploadDesc, bufferUploadDescNum); for (uint32_t i = 0; i < bufferUploadDescNum; i++) { if (!ValidateBufferUploadDesc(m_Device, i, bufferUploadDescs[i])) diff --git a/Source/Validation/DescriptorSetVal.cpp b/Source/Validation/DescriptorSetVal.cpp index cb868da5..34169f05 100644 --- a/Source/Validation/DescriptorSetVal.cpp +++ b/Source/Validation/DescriptorSetVal.cpp @@ -26,7 +26,7 @@ void DescriptorSetVal::UpdateDescriptorRanges(uint32_t rangeOffset, uint32_t ran "UpdateDescriptorRanges: 'rangeOffset' + 'rangeNum' is greater than the number of ranges. (rangeOffset=%u, rangeNum=%u, rangeNum=%u)", rangeOffset, rangeNum, GetDesc().rangeNum); - DescriptorRangeUpdateDesc* rangeUpdateDescsImpl = STACK_ALLOC(DescriptorRangeUpdateDesc, rangeNum); + DescriptorRangeUpdateDesc* rangeUpdateDescsImpl = StackAlloc(DescriptorRangeUpdateDesc, rangeNum); for (uint32_t i = 0; i < rangeNum; i++) { const DescriptorRangeUpdateDesc& updateDesc = rangeUpdateDescs[i]; const DescriptorRangeDesc& rangeDesc = GetDesc().ranges[rangeOffset + i]; @@ -46,7 +46,7 @@ void DescriptorSetVal::UpdateDescriptorRanges(uint32_t rangeOffset, uint32_t ran DescriptorRangeUpdateDesc& dstDesc = rangeUpdateDescsImpl[i]; dstDesc = updateDesc; - dstDesc.descriptors = STACK_ALLOC(Descriptor*, updateDesc.descriptorNum); + dstDesc.descriptors = StackAlloc(Descriptor*, updateDesc.descriptorNum); Descriptor** descriptors = (Descriptor**)dstDesc.descriptors; for (uint32_t j = 0; j < updateDesc.descriptorNum; j++) { @@ -72,7 +72,7 @@ void DescriptorSetVal::UpdateDynamicConstantBuffers(uint32_t baseBuffer, uint32_ RETURN_ON_FAILURE(&m_Device, descriptors != nullptr, ReturnVoid(), "UpdateDynamicConstantBuffers: 'descriptors' is NULL"); - Descriptor** descriptorsImpl = STACK_ALLOC(Descriptor*, bufferNum); + Descriptor** descriptorsImpl = StackAlloc(Descriptor*, bufferNum); for (uint32_t i = 0; i < bufferNum; i++) { RETURN_ON_FAILURE(&m_Device, descriptors[i] != nullptr, ReturnVoid(), "UpdateDynamicConstantBuffers: 'descriptors[%u]' is NULL", i); diff --git a/Source/Validation/DeviceVal.cpp b/Source/Validation/DeviceVal.cpp index fb4dd7de..c3852d89 100644 --- a/Source/Validation/DeviceVal.cpp +++ b/Source/Validation/DeviceVal.cpp @@ -45,8 +45,8 @@ DeviceVal::DeviceVal(const CallbackInterface& callbacks, const StdAllocatorDestroy(); + Destroy(GetStdAllocator(), m_CommandQueues[i]); + ((DeviceBase*)&m_Device)->Destruct(); } bool DeviceVal::Create() { @@ -67,13 +67,18 @@ bool DeviceVal::Create() { return false; } + if (deviceBase.FillFunctionTable(m_ResourceAllocatorAPI) != Result::SUCCESS) { + REPORT_ERROR(this, "Failed to get 'ResourceAllocatorInterface' interface"); + return false; + } + + m_IsLowLatencySupported = deviceBase.FillFunctionTable(m_LowLatencyAPI) == Result::SUCCESS; + m_IsMeshShaderSupported = deviceBase.FillFunctionTable(m_MeshShaderAPI) == Result::SUCCESS; + m_IsRayTracingSupported = deviceBase.FillFunctionTable(m_RayTracingAPI) == Result::SUCCESS; + m_IsSwapChainSupported = deviceBase.FillFunctionTable(m_SwapChainAPI) == Result::SUCCESS; m_IsWrapperD3D11Supported = deviceBase.FillFunctionTable(m_WrapperD3D11API) == Result::SUCCESS; m_IsWrapperD3D12Supported = deviceBase.FillFunctionTable(m_WrapperD3D12API) == Result::SUCCESS; m_IsWrapperVKSupported = deviceBase.FillFunctionTable(m_WrapperVKAPI) == Result::SUCCESS; - m_IsSwapChainSupported = deviceBase.FillFunctionTable(m_SwapChainAPI) == Result::SUCCESS; - m_IsRayTracingSupported = deviceBase.FillFunctionTable(m_RayTracingAPI) == Result::SUCCESS; - m_IsMeshShaderSupported = deviceBase.FillFunctionTable(m_MeshShaderAPI) == Result::SUCCESS; - m_IsLowLatencySupported = deviceBase.FillFunctionTable(m_LowLatencyAPI) == Result::SUCCESS; return true; } @@ -83,16 +88,6 @@ void DeviceVal::RegisterMemoryType(MemoryType memoryType, MemoryLocation memoryL m_MemoryTypeMap[memoryType] = memoryLocation; } -void DeviceVal::GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { - GetCoreInterface().GetBufferMemoryDesc(GetImpl(), bufferDesc, memoryLocation, memoryDesc); - RegisterMemoryType(memoryDesc.type, memoryLocation); -} - -void DeviceVal::GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { - GetCoreInterface().GetTextureMemoryDesc(GetImpl(), textureDesc, memoryLocation, memoryDesc); - RegisterMemoryType(memoryDesc.type, memoryLocation); -} - Result DeviceVal::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { RETURN_ON_FAILURE(this, swapChainDesc.commandQueue != nullptr, Result::INVALID_ARGUMENT, "CreateSwapChain: 'swapChainDesc.commandQueue' is NULL"); RETURN_ON_FAILURE(this, swapChainDesc.width != 0, Result::INVALID_ARGUMENT, "CreateSwapChain: 'swapChainDesc.width' is 0"); @@ -104,19 +99,17 @@ Result DeviceVal::CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain* swapChainDescImpl.commandQueue = NRI_GET_IMPL(CommandQueue, swapChainDesc.commandQueue); SwapChain* swapChainImpl; - const Result result = m_SwapChainAPI.CreateSwapChain(m_Device, swapChainDescImpl, swapChainImpl); + Result result = m_SwapChainAPI.CreateSwapChain(m_Device, swapChainDescImpl, swapChainImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, swapChainImpl != nullptr, Result::FAILURE, "CreateSwapChain: 'impl' is NULL"); + if (result == Result::SUCCESS) swapChain = (SwapChain*)Allocate(GetStdAllocator(), *this, swapChainImpl, swapChainDesc); - } return result; } void DeviceVal::DestroySwapChain(SwapChain& swapChain) { m_SwapChainAPI.DestroySwapChain(*NRI_GET_IMPL(SwapChain, &swapChain)); - Deallocate(GetStdAllocator(), (SwapChainVal*)&swapChain); + Destroy(GetStdAllocator(), (SwapChainVal*)&swapChain); } void DeviceVal::SetDebugName(const char* name) { @@ -132,7 +125,7 @@ Result DeviceVal::GetCommandQueue(CommandQueueType commandQueueType, CommandQueu RETURN_ON_FAILURE(this, commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "GetCommandQueue: 'commandQueueType' is invalid"); CommandQueue* commandQueueImpl; - const Result result = m_CoreAPI.GetCommandQueue(m_Device, commandQueueType, commandQueueImpl); + Result result = m_CoreAPI.GetCommandQueue(m_Device, commandQueueType, commandQueueImpl); if (result == Result::SUCCESS) { const uint32_t index = (uint32_t)commandQueueType; @@ -149,24 +142,20 @@ Result DeviceVal::CreateCommandAllocator(const CommandQueue& commandQueue, Comma auto commandQueueImpl = NRI_GET_IMPL(CommandQueue, &commandQueue); CommandAllocator* commandAllocatorImpl = nullptr; - const Result result = m_CoreAPI.CreateCommandAllocator(*commandQueueImpl, commandAllocatorImpl); + Result result = m_CoreAPI.CreateCommandAllocator(*commandQueueImpl, commandAllocatorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandAllocatorImpl != nullptr, Result::FAILURE, "CreateCommandAllocator: 'impl' is NULL"); + if (result == Result::SUCCESS) commandAllocator = (CommandAllocator*)Allocate(GetStdAllocator(), *this, commandAllocatorImpl); - } return result; } Result DeviceVal::CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool) { DescriptorPool* descriptorPoolImpl = nullptr; - const Result result = m_CoreAPI.CreateDescriptorPool(m_Device, descriptorPoolDesc, descriptorPoolImpl); + Result result = m_CoreAPI.CreateDescriptorPool(m_Device, descriptorPoolDesc, descriptorPoolImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorPoolImpl != nullptr, Result::FAILURE, "CreateDescriptorPool: 'impl' is NULL"); + if (result == Result::SUCCESS) descriptorPool = (DescriptorPool*)Allocate(GetStdAllocator(), *this, descriptorPoolImpl, descriptorPoolDesc); - } return result; } @@ -175,12 +164,22 @@ Result DeviceVal::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { RETURN_ON_FAILURE(this, bufferDesc.size != 0, Result::INVALID_ARGUMENT, "CreateBuffer: 'bufferDesc.size' is 0"); Buffer* bufferImpl = nullptr; - const Result result = m_CoreAPI.CreateBuffer(m_Device, bufferDesc, bufferImpl); + Result result = m_CoreAPI.CreateBuffer(m_Device, bufferDesc, bufferImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, bufferImpl != nullptr, Result::FAILURE, "CreateBuffer: 'impl' is NULL"); - buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl); - } + if (result == Result::SUCCESS) + buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl, false); + + return result; +} + +Result DeviceVal::CreateBuffer(const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { + RETURN_ON_FAILURE(this, bufferDesc.desc.size != 0, Result::INVALID_ARGUMENT, "AllocateBuffer: 'bufferDesc.size' is 0"); + + Buffer* bufferImpl = nullptr; + Result result = m_ResourceAllocatorAPI.AllocateBuffer(m_Device, bufferDesc, bufferImpl); + + if (result == Result::SUCCESS) + buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl, true); return result; } @@ -219,12 +218,34 @@ Result DeviceVal::CreateTexture(const TextureDesc& textureDesc, Texture*& textur RETURN_ON_FAILURE(this, textureDesc.sampleNum != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.sampleNum' is 0"); Texture* textureImpl = nullptr; - const Result result = m_CoreAPI.CreateTexture(m_Device, textureDesc, textureImpl); + Result result = m_CoreAPI.CreateTexture(m_Device, textureDesc, textureImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, textureImpl != nullptr, Result::FAILURE, "CreateTexture: 'impl' is NULL"); - texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl); - } + if (result == Result::SUCCESS) + texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl, false); + + return result; +} + +Result DeviceVal::CreateTexture(const AllocateTextureDesc& textureDesc, Texture*& texture) { + Mip_t maxMipNum = GetMaxMipNum(textureDesc.desc.width, textureDesc.desc.height, textureDesc.desc.depth); + + RETURN_ON_FAILURE( + this, textureDesc.desc.format > Format::UNKNOWN && textureDesc.desc.format < Format::MAX_NUM, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.format' is invalid"); + + RETURN_ON_FAILURE(this, textureDesc.desc.width != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.width' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.height != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.height' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.depth != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.depth' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.mipNum != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.mipNum' is 0"); + RETURN_ON_FAILURE( + this, textureDesc.desc.mipNum <= maxMipNum, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.mipNum = %u' can't be > %u", textureDesc.desc.mipNum, maxMipNum); + RETURN_ON_FAILURE(this, textureDesc.desc.arraySize != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.arraySize' is 0"); + RETURN_ON_FAILURE(this, textureDesc.desc.sampleNum != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.sampleNum' is 0"); + + Texture* textureImpl = nullptr; + Result result = m_ResourceAllocatorAPI.AllocateTexture(m_Device, textureDesc, textureImpl); + + if (result == Result::SUCCESS) + texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl, true); return result; } @@ -247,12 +268,10 @@ Result DeviceVal::CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descrip bufferViewDescImpl.buffer = NRI_GET_IMPL(Buffer, bufferViewDesc.buffer); Descriptor* descriptorImpl = nullptr; - const Result result = m_CoreAPI.CreateBufferView(bufferViewDescImpl, descriptorImpl); + Result result = m_CoreAPI.CreateBufferView(bufferViewDescImpl, descriptorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorImpl != nullptr, Result::FAILURE, "CreateDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) bufferView = (Descriptor*)Allocate(GetStdAllocator(), *this, descriptorImpl, bufferViewDesc); - } return result; } @@ -285,12 +304,10 @@ Result DeviceVal::CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Des textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); Descriptor* descriptorImpl = nullptr; - const Result result = m_CoreAPI.CreateTexture1DView(textureViewDescImpl, descriptorImpl); + Result result = m_CoreAPI.CreateTexture1DView(textureViewDescImpl, descriptorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorImpl != nullptr, Result::FAILURE, "CreateDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) textureView = (Descriptor*)Allocate(GetStdAllocator(), *this, descriptorImpl, textureViewDesc); - } return result; } @@ -328,12 +345,10 @@ Result DeviceVal::CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Des textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); Descriptor* descriptorImpl = nullptr; - const Result result = m_CoreAPI.CreateTexture2DView(textureViewDescImpl, descriptorImpl); + Result result = m_CoreAPI.CreateTexture2DView(textureViewDescImpl, descriptorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorImpl != nullptr, Result::FAILURE, "CreateDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) textureView = (Descriptor*)Allocate(GetStdAllocator(), *this, descriptorImpl, textureViewDesc); - } return result; } @@ -371,12 +386,10 @@ Result DeviceVal::CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Des textureViewDescImpl.texture = NRI_GET_IMPL(Texture, textureViewDesc.texture); Descriptor* descriptorImpl = nullptr; - const Result result = m_CoreAPI.CreateTexture3DView(textureViewDescImpl, descriptorImpl); + Result result = m_CoreAPI.CreateTexture3DView(textureViewDescImpl, descriptorImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorImpl != nullptr, Result::FAILURE, "CreateDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) textureView = (Descriptor*)Allocate(GetStdAllocator(), *this, descriptorImpl, textureViewDesc); - } return result; } @@ -393,12 +406,10 @@ Result DeviceVal::CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& RETURN_ON_FAILURE(this, samplerDesc.borderColor < BorderColor::MAX_NUM, Result::INVALID_ARGUMENT, "CreateSampler: 'samplerDesc.borderColor' is invalid"); Descriptor* samplerImpl = nullptr; - const Result result = m_CoreAPI.CreateSampler(m_Device, samplerDesc, samplerImpl); + Result result = m_CoreAPI.CreateSampler(m_Device, samplerDesc, samplerImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, samplerImpl != nullptr, Result::FAILURE, "CreateDescriptor: 'impl' is NULL"); + if (result == Result::SUCCESS) sampler = (Descriptor*)Allocate(GetStdAllocator(), *this, samplerImpl); - } return result; } @@ -443,12 +454,10 @@ Result DeviceVal::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutD } PipelineLayout* pipelineLayoutImpl = nullptr; - const Result result = m_CoreAPI.CreatePipelineLayout(m_Device, pipelineLayoutDesc, pipelineLayoutImpl); + Result result = m_CoreAPI.CreatePipelineLayout(m_Device, pipelineLayoutDesc, pipelineLayoutImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineLayoutImpl != nullptr, Result::FAILURE, "CreatePipelineLayout: 'impl' is NULL"); + if (result == Result::SUCCESS) pipelineLayout = (PipelineLayout*)Allocate(GetStdAllocator(), *this, pipelineLayoutImpl, pipelineLayoutDesc); - } return result; } @@ -497,12 +506,10 @@ Result DeviceVal::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDes graphicsPipelineDescImpl.pipelineLayout = NRI_GET_IMPL(PipelineLayout, graphicsPipelineDesc.pipelineLayout); Pipeline* pipelineImpl = nullptr; - const Result result = m_CoreAPI.CreateGraphicsPipeline(m_Device, graphicsPipelineDescImpl, pipelineImpl); + Result result = m_CoreAPI.CreateGraphicsPipeline(m_Device, graphicsPipelineDescImpl, pipelineImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineImpl != nullptr, Result::FAILURE, "CreatePipeline: 'impl' is NULL"); + if (result == Result::SUCCESS) pipeline = (Pipeline*)Allocate(GetStdAllocator(), *this, pipelineImpl, graphicsPipelineDesc); - } return result; } @@ -519,12 +526,10 @@ Result DeviceVal::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, computePipelineDescImpl.pipelineLayout = NRI_GET_IMPL(PipelineLayout, computePipelineDesc.pipelineLayout); Pipeline* pipelineImpl = nullptr; - const Result result = m_CoreAPI.CreateComputePipeline(m_Device, computePipelineDescImpl, pipelineImpl); + Result result = m_CoreAPI.CreateComputePipeline(m_Device, computePipelineDescImpl, pipelineImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineImpl != nullptr, Result::FAILURE, "CreatePipeline: 'impl' is NULL"); + if (result == Result::SUCCESS) pipeline = (Pipeline*)Allocate(GetStdAllocator(), *this, pipelineImpl, computePipelineDesc); - } return result; } @@ -534,76 +539,78 @@ Result DeviceVal::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool* RETURN_ON_FAILURE(this, queryPoolDesc.capacity > 0, Result::INVALID_ARGUMENT, "CreateQueryPool: 'queryPoolDesc.capacity' is 0"); QueryPool* queryPoolImpl = nullptr; - const Result result = m_CoreAPI.CreateQueryPool(m_Device, queryPoolDesc, queryPoolImpl); + Result result = m_CoreAPI.CreateQueryPool(m_Device, queryPoolDesc, queryPoolImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, queryPoolImpl != nullptr, Result::FAILURE, "CreateQueryPool: 'impl' is NULL"); + if (result == Result::SUCCESS) queryPool = (QueryPool*)Allocate(GetStdAllocator(), *this, queryPoolImpl, queryPoolDesc.queryType, queryPoolDesc.capacity); - } return result; } Result DeviceVal::CreateFence(uint64_t initialValue, Fence*& fence) { Fence* fenceImpl; - const Result result = m_CoreAPI.CreateFence(m_Device, initialValue, fenceImpl); + Result result = m_CoreAPI.CreateFence(m_Device, initialValue, fenceImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, fenceImpl != nullptr, Result::FAILURE, "CreateFence: 'impl' is NULL"); + if (result == Result::SUCCESS) fence = (Fence*)Allocate(GetStdAllocator(), *this, fenceImpl); - } return result; } +void DeviceVal::DestroyCommandBuffer(CommandBuffer& commandBuffer) { + m_CoreAPI.DestroyCommandBuffer(*NRI_GET_IMPL(CommandBuffer, &commandBuffer)); + Destroy(GetStdAllocator(), (CommandBufferVal*)&commandBuffer); +} + void DeviceVal::DestroyCommandAllocator(CommandAllocator& commandAllocator) { m_CoreAPI.DestroyCommandAllocator(*NRI_GET_IMPL(CommandAllocator, &commandAllocator)); - Deallocate(GetStdAllocator(), (CommandAllocatorVal*)&commandAllocator); + Destroy(GetStdAllocator(), (CommandAllocatorVal*)&commandAllocator); } void DeviceVal::DestroyDescriptorPool(DescriptorPool& descriptorPool) { m_CoreAPI.DestroyDescriptorPool(*NRI_GET_IMPL(DescriptorPool, &descriptorPool)); - Deallocate(GetStdAllocator(), (DescriptorPoolVal*)&descriptorPool); + Destroy(GetStdAllocator(), (DescriptorPoolVal*)&descriptorPool); } void DeviceVal::DestroyBuffer(Buffer& buffer) { m_CoreAPI.DestroyBuffer(*NRI_GET_IMPL(Buffer, &buffer)); - Deallocate(GetStdAllocator(), (BufferVal*)&buffer); + Destroy(GetStdAllocator(), (BufferVal*)&buffer); } void DeviceVal::DestroyTexture(Texture& texture) { m_CoreAPI.DestroyTexture(*NRI_GET_IMPL(Texture, &texture)); - Deallocate(GetStdAllocator(), (TextureVal*)&texture); + Destroy(GetStdAllocator(), (TextureVal*)&texture); } void DeviceVal::DestroyDescriptor(Descriptor& descriptor) { m_CoreAPI.DestroyDescriptor(*NRI_GET_IMPL(Descriptor, &descriptor)); - Deallocate(GetStdAllocator(), (DescriptorVal*)&descriptor); + Destroy(GetStdAllocator(), (DescriptorVal*)&descriptor); } void DeviceVal::DestroyPipelineLayout(PipelineLayout& pipelineLayout) { m_CoreAPI.DestroyPipelineLayout(*NRI_GET_IMPL(PipelineLayout, &pipelineLayout)); - Deallocate(GetStdAllocator(), (PipelineLayoutVal*)&pipelineLayout); + Destroy(GetStdAllocator(), (PipelineLayoutVal*)&pipelineLayout); } void DeviceVal::DestroyPipeline(Pipeline& pipeline) { m_CoreAPI.DestroyPipeline(*NRI_GET_IMPL(Pipeline, &pipeline)); - Deallocate(GetStdAllocator(), (PipelineVal*)&pipeline); + Destroy(GetStdAllocator(), (PipelineVal*)&pipeline); } void DeviceVal::DestroyQueryPool(QueryPool& queryPool) { m_CoreAPI.DestroyQueryPool(*NRI_GET_IMPL(QueryPool, &queryPool)); - Deallocate(GetStdAllocator(), (QueryPoolVal*)&queryPool); + Destroy(GetStdAllocator(), (QueryPoolVal*)&queryPool); } void DeviceVal::DestroyFence(Fence& fence) { m_CoreAPI.DestroyFence(*NRI_GET_IMPL(Fence, &fence)); - Deallocate(GetStdAllocator(), (FenceVal*)&fence); + Destroy(GetStdAllocator(), (FenceVal*)&fence); } Result DeviceVal::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory) { RETURN_ON_FAILURE(this, allocateMemoryDesc.size > 0, Result::INVALID_ARGUMENT, "AllocateMemory: 'allocateMemoryDesc.size' is 0"); - RETURN_ON_FAILURE(this, allocateMemoryDesc.priority >= -1.0f && allocateMemoryDesc.priority <= 1.0f, Result::INVALID_ARGUMENT, "AllocateMemory: 'allocateMemoryDesc.priority' outside of [-1; 1] range"); + RETURN_ON_FAILURE(this, allocateMemoryDesc.priority >= -1.0f && allocateMemoryDesc.priority <= 1.0f, Result::INVALID_ARGUMENT, + "AllocateMemory: 'allocateMemoryDesc.priority' outside of [-1; 1] range"); std::unordered_map::iterator it; std::unordered_map::iterator end; @@ -616,23 +623,18 @@ Result DeviceVal::AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, M RETURN_ON_FAILURE(this, it != end, Result::FAILURE, "AllocateMemory: 'memoryType' is invalid"); Memory* memoryImpl; - const Result result = m_CoreAPI.AllocateMemory(m_Device, allocateMemoryDesc, memoryImpl); + Result result = m_CoreAPI.AllocateMemory(m_Device, allocateMemoryDesc, memoryImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, memoryImpl != nullptr, Result::FAILURE, "AllocateMemory: 'impl' is NULL"); + if (result == Result::SUCCESS) memory = (Memory*)Allocate(GetStdAllocator(), *this, memoryImpl, allocateMemoryDesc.size, it->second); - } return result; } Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - if (memoryBindingDescNum == 0) - return Result::SUCCESS; - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "BindBufferMemory: 'memoryBindingDescs' is NULL"); - BufferMemoryBindingDesc* memoryBindingDescsImpl = STACK_ALLOC(BufferMemoryBindingDesc, memoryBindingDescNum); + BufferMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(BufferMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { BufferMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; @@ -655,7 +657,7 @@ Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingD continue; MemoryDesc memoryDesc = {}; - GetMemoryDesc(buffer.GetDesc(), memory.GetMemoryLocation(), memoryDesc); + GetCoreInterface().GetBufferMemoryDesc(GetImpl(), buffer.GetDesc(), memory.GetMemoryLocation(), memoryDesc); RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "BindBufferMemory: 'memoryBindingDescs[%u].offset' must be zero for dedicated allocation", i); @@ -669,7 +671,7 @@ Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingD RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "BindBufferMemory: 'memoryBindingDescs[%u].offset' is invalid", i); } - const Result result = m_CoreAPI.BindBufferMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); + Result result = m_CoreAPI.BindBufferMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); if (result == Result::SUCCESS) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { @@ -682,9 +684,9 @@ Result DeviceVal::BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingD } Result DeviceVal::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr || memoryBindingDescNum == 0, Result::INVALID_ARGUMENT, "BindTextureMemory: 'memoryBindingDescs' is a NULL"); + RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "BindTextureMemory: 'memoryBindingDescs' is a NULL"); - TextureMemoryBindingDesc* memoryBindingDescsImpl = STACK_ALLOC(TextureMemoryBindingDesc, memoryBindingDescNum); + TextureMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(TextureMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { TextureMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; @@ -707,7 +709,7 @@ Result DeviceVal::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindin continue; MemoryDesc memoryDesc = {}; - GetMemoryDesc(texture.GetDesc(), memory.GetMemoryLocation(), memoryDesc); + GetCoreInterface().GetTextureMemoryDesc(GetImpl(), texture.GetDesc(), memory.GetMemoryLocation(), memoryDesc); RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "BindTextureMemory: 'memoryBindingDescs[%u].offset' must be zero for dedicated allocation", i); @@ -721,7 +723,7 @@ Result DeviceVal::BindTextureMemory(const TextureMemoryBindingDesc* memoryBindin RETURN_ON_FAILURE(this, memorySizeIsUnknown || rangeMax <= memory.GetSize(), Result::INVALID_ARGUMENT, "BindTextureMemory: 'memoryBindingDescs[%u].offset' is invalid", i); } - const Result result = m_CoreAPI.BindTextureMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); + Result result = m_CoreAPI.BindTextureMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); if (result == Result::SUCCESS) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { @@ -743,7 +745,7 @@ void DeviceVal::FreeMemory(Memory& memory) { } m_CoreAPI.FreeMemory(*NRI_GET_IMPL(Memory, &memory)); - Deallocate(GetStdAllocator(), (MemoryVal*)&memory); + Destroy(GetStdAllocator(), (MemoryVal*)&memory); } FormatSupportBits DeviceVal::GetFormatSupport(Format format) const { @@ -752,186 +754,154 @@ FormatSupportBits DeviceVal::GetFormatSupport(Format format) const { #if NRI_USE_VULKAN -Result DeviceVal::CreateCommandQueueVK(const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { - RETURN_ON_FAILURE(this, commandQueueVKDesc.vkQueue != 0, Result::INVALID_ARGUMENT, "CreateCommandQueueVK: 'commandQueueVKDesc.vkQueue' is NULL"); +Result DeviceVal::CreateCommandQueue(const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { + RETURN_ON_FAILURE(this, commandQueueVKDesc.vkQueue != 0, Result::INVALID_ARGUMENT, "CreateCommandQueue: 'commandQueueVKDesc.vkQueue' is NULL"); RETURN_ON_FAILURE( - this, commandQueueVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "CreateCommandQueueVK: 'commandQueueVKDesc.commandQueueType' is invalid"); + this, commandQueueVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "CreateCommandQueue: 'commandQueueVKDesc.commandQueueType' is invalid"); CommandQueue* commandQueueImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateCommandQueueVK(m_Device, commandQueueVKDesc, commandQueueImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandQueueImpl != nullptr, Result::FAILURE, "CreateCommandQueueVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateCommandQueueVK(m_Device, commandQueueVKDesc, commandQueueImpl); + if (result == Result::SUCCESS) commandQueue = (CommandQueue*)Allocate(GetStdAllocator(), *this, commandQueueImpl); - } return result; } -Result DeviceVal::CreateCommandAllocatorVK(const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { - RETURN_ON_FAILURE(this, commandAllocatorVKDesc.vkCommandPool != 0, Result::INVALID_ARGUMENT, "CreateCommandAllocatorVK: 'commandAllocatorVKDesc.vkCommandPool' is NULL"); +Result DeviceVal::CreateCommandAllocator(const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { + RETURN_ON_FAILURE(this, commandAllocatorVKDesc.vkCommandPool != 0, Result::INVALID_ARGUMENT, "CreateCommandAllocator: 'commandAllocatorVKDesc.vkCommandPool' is NULL"); RETURN_ON_FAILURE(this, commandAllocatorVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, - "CreateCommandAllocatorVK: 'commandAllocatorVKDesc.commandQueueType' is invalid"); + "CreateCommandAllocator: 'commandAllocatorVKDesc.commandQueueType' is invalid"); CommandAllocator* commandAllocatorImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateCommandAllocatorVK(m_Device, commandAllocatorVKDesc, commandAllocatorImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandAllocatorImpl != nullptr, Result::FAILURE, "CreateCommandAllocatorVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateCommandAllocatorVK(m_Device, commandAllocatorVKDesc, commandAllocatorImpl); + if (result == Result::SUCCESS) commandAllocator = (CommandAllocator*)Allocate(GetStdAllocator(), *this, commandAllocatorImpl); - } return result; } -Result DeviceVal::CreateCommandBufferVK(const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE(this, commandBufferVKDesc.vkCommandBuffer != 0, Result::INVALID_ARGUMENT, "CreateCommandBufferVK: 'commandBufferVKDesc.vkCommandBuffer' is NULL"); +Result DeviceVal::CreateCommandBuffer(const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { + RETURN_ON_FAILURE(this, commandBufferVKDesc.vkCommandBuffer != 0, Result::INVALID_ARGUMENT, "CreateCommandBuffer: 'commandBufferVKDesc.vkCommandBuffer' is NULL"); - RETURN_ON_FAILURE(this, commandBufferVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, - "CreateCommandBufferVK: 'commandBufferVKDesc.commandQueueType' is invalid"); + RETURN_ON_FAILURE( + this, commandBufferVKDesc.commandQueueType < CommandQueueType::MAX_NUM, Result::INVALID_ARGUMENT, "CreateCommandBuffer: 'commandBufferVKDesc.commandQueueType' is invalid"); CommandBuffer* commandBufferImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateCommandBufferVK(m_Device, commandBufferVKDesc, commandBufferImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandBufferImpl != nullptr, Result::FAILURE, "CreateCommandBufferVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateCommandBufferVK(m_Device, commandBufferVKDesc, commandBufferImpl); + if (result == Result::SUCCESS) commandBuffer = (CommandBuffer*)Allocate(GetStdAllocator(), *this, commandBufferImpl, true); - } return result; } -Result DeviceVal::CreateDescriptorPoolVK(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { - RETURN_ON_FAILURE(this, descriptorPoolVKDesc.vkDescriptorPool != 0, Result::INVALID_ARGUMENT, "CreateDescriptorPoolVK: 'vkDescriptorPool' is NULL"); - RETURN_ON_FAILURE(this, descriptorPoolVKDesc.descriptorSetMaxNum != 0, Result::INVALID_ARGUMENT, "CreateDescriptorPoolVK: 'descriptorSetMaxNum' is 0"); +Result DeviceVal::CreateDescriptorPool(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { + RETURN_ON_FAILURE(this, descriptorPoolVKDesc.vkDescriptorPool != 0, Result::INVALID_ARGUMENT, "CreateDescriptorPool: 'vkDescriptorPool' is NULL"); + RETURN_ON_FAILURE(this, descriptorPoolVKDesc.descriptorSetMaxNum != 0, Result::INVALID_ARGUMENT, "CreateDescriptorPool: 'descriptorSetMaxNum' is 0"); DescriptorPool* descriptorPoolImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateDescriptorPoolVK(m_Device, descriptorPoolVKDesc, descriptorPoolImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorPoolImpl != nullptr, Result::FAILURE, "CreateDescriptorPoolVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateDescriptorPoolVK(m_Device, descriptorPoolVKDesc, descriptorPoolImpl); + if (result == Result::SUCCESS) descriptorPool = (DescriptorPool*)Allocate(GetStdAllocator(), *this, descriptorPoolImpl, descriptorPoolVKDesc.descriptorSetMaxNum); - } return result; } -Result DeviceVal::CreateBufferVK(const BufferVKDesc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.vkBuffer != 0, Result::INVALID_ARGUMENT, "CreateBufferVK: 'bufferDesc.vkBuffer' is NULL"); - RETURN_ON_FAILURE(this, bufferDesc.memory != nullptr, Result::INVALID_ARGUMENT, "CreateBufferVK: 'bufferDesc.memory' is NULL"); - RETURN_ON_FAILURE(this, bufferDesc.size > 0, Result::INVALID_ARGUMENT, "CreateBufferVK: 'bufferDesc.bufferSize' is 0"); +Result DeviceVal::CreateBuffer(const BufferVKDesc& bufferDesc, Buffer*& buffer) { + RETURN_ON_FAILURE(this, bufferDesc.vkBuffer != 0, Result::INVALID_ARGUMENT, "CreateBuffer: 'bufferDesc.vkBuffer' is NULL"); + RETURN_ON_FAILURE(this, bufferDesc.size > 0, Result::INVALID_ARGUMENT, "CreateBuffer: 'bufferDesc.bufferSize' is 0"); Buffer* bufferImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateBufferVK(m_Device, bufferDesc, bufferImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, bufferImpl != nullptr, Result::FAILURE, "CreateBufferVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateBufferVK(m_Device, bufferDesc, bufferImpl); - buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl); - } + if (result == Result::SUCCESS) + buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl, true); return result; } -Result DeviceVal::CreateTextureVK(const TextureVKDesc& textureVKDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureVKDesc.vkImage != 0, Result::INVALID_ARGUMENT, "CreateTextureVK: 'textureVKDesc.vkImage' is NULL"); - RETURN_ON_FAILURE(this, nriConvertVKFormatToNRI(textureVKDesc.vkFormat) != Format::UNKNOWN, Result::INVALID_ARGUMENT, "CreateTextureVK: 'textureVKDesc.sampleNum' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.sampleNum > 0, Result::INVALID_ARGUMENT, "CreateTextureVK: 'textureVKDesc.sampleNum' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.arraySize > 0, Result::INVALID_ARGUMENT, "CreateTextureVK: 'textureVKDesc.arraySize' is 0"); - RETURN_ON_FAILURE(this, textureVKDesc.mipNum > 0, Result::INVALID_ARGUMENT, "CreateTextureVK: 'textureVKDesc.mipNum' is 0"); +Result DeviceVal::CreateTexture(const TextureVKDesc& textureVKDesc, Texture*& texture) { + RETURN_ON_FAILURE(this, textureVKDesc.vkImage != 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureVKDesc.vkImage' is NULL"); + RETURN_ON_FAILURE(this, nriConvertVKFormatToNRI(textureVKDesc.vkFormat) != Format::UNKNOWN, Result::INVALID_ARGUMENT, "CreateTexture: 'textureVKDesc.sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.sampleNum > 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureVKDesc.sampleNum' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.arraySize > 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureVKDesc.arraySize' is 0"); + RETURN_ON_FAILURE(this, textureVKDesc.mipNum > 0, Result::INVALID_ARGUMENT, "CreateTexture: 'textureVKDesc.mipNum' is 0"); Texture* textureImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateTextureVK(m_Device, textureVKDesc, textureImpl); + Result result = m_WrapperVKAPI.CreateTextureVK(m_Device, textureVKDesc, textureImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, textureImpl != nullptr, Result::FAILURE, "CreateTextureVK: 'impl' is NULL"); - - texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl); - } + if (result == Result::SUCCESS) + texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl, true); return result; } -Result DeviceVal::CreateMemoryVK(const MemoryVKDesc& memoryVKDesc, Memory*& memory) { - RETURN_ON_FAILURE(this, memoryVKDesc.vkDeviceMemory != 0, Result::INVALID_ARGUMENT, "CreateMemoryVK: 'memoryVKDesc.vkDeviceMemory' is NULL"); - RETURN_ON_FAILURE(this, memoryVKDesc.size > 0, Result::INVALID_ARGUMENT, "CreateMemoryVK: 'memoryVKDesc.size' is 0"); +Result DeviceVal::CreateMemory(const MemoryVKDesc& memoryVKDesc, Memory*& memory) { + RETURN_ON_FAILURE(this, memoryVKDesc.vkDeviceMemory != 0, Result::INVALID_ARGUMENT, "CreateMemory: 'memoryVKDesc.vkDeviceMemory' is NULL"); + RETURN_ON_FAILURE(this, memoryVKDesc.size > 0, Result::INVALID_ARGUMENT, "CreateMemory: 'memoryVKDesc.size' is 0"); Memory* memoryImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateMemoryVK(m_Device, memoryVKDesc, memoryImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, memoryImpl != nullptr, Result::FAILURE, "CreateMemoryVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateMemoryVK(m_Device, memoryVKDesc, memoryImpl); + if (result == Result::SUCCESS) memory = (Memory*)Allocate(GetStdAllocator(), *this, memoryImpl, memoryVKDesc.size, MemoryLocation::MAX_NUM); - } return result; } -Result DeviceVal::CreateGraphicsPipelineVK(NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, vkPipeline != 0, Result::INVALID_ARGUMENT, "CreateGraphicsPipelineVK: 'vkPipeline' is NULL"); +Result DeviceVal::CreateGraphicsPipeline(VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + RETURN_ON_FAILURE(this, vkPipeline != 0, Result::INVALID_ARGUMENT, "CreateGraphicsPipeline: 'vkPipeline' is NULL"); Pipeline* pipelineImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateGraphicsPipelineVK(m_Device, vkPipeline, pipelineImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineImpl != nullptr, Result::FAILURE, "CreateGraphicsPipelineVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateGraphicsPipelineVK(m_Device, vkPipeline, pipelineImpl); + if (result == Result::SUCCESS) pipeline = (Pipeline*)Allocate(GetStdAllocator(), *this, pipelineImpl); - } return result; } -Result DeviceVal::CreateComputePipelineVK(NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, vkPipeline != 0, Result::INVALID_ARGUMENT, "CreateComputePipelineVK: 'vkPipeline' is NULL"); +Result DeviceVal::CreateComputePipeline(VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + RETURN_ON_FAILURE(this, vkPipeline != 0, Result::INVALID_ARGUMENT, "CreateComputePipeline: 'vkPipeline' is NULL"); Pipeline* pipelineImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateComputePipelineVK(m_Device, vkPipeline, pipelineImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineImpl != nullptr, Result::FAILURE, "CreateComputePipelineVK: 'impl' is NULL"); + Result result = m_WrapperVKAPI.CreateComputePipelineVK(m_Device, vkPipeline, pipelineImpl); + if (result == Result::SUCCESS) pipeline = (Pipeline*)Allocate(GetStdAllocator(), *this, pipelineImpl); - } return result; } -Result DeviceVal::CreateQueryPoolVK(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { - RETURN_ON_FAILURE(this, queryPoolVKDesc.vkQueryPool != 0, Result::INVALID_ARGUMENT, "CreateQueryPoolVK: 'queryPoolVKDesc.vkQueryPool' is NULL"); +Result DeviceVal::CreateQueryPool(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { + RETURN_ON_FAILURE(this, queryPoolVKDesc.vkQueryPool != 0, Result::INVALID_ARGUMENT, "CreateQueryPool: 'queryPoolVKDesc.vkQueryPool' is NULL"); QueryPool* queryPoolImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateQueryPoolVK(m_Device, queryPoolVKDesc, queryPoolImpl); + Result result = m_WrapperVKAPI.CreateQueryPoolVK(m_Device, queryPoolVKDesc, queryPoolImpl); if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, queryPoolImpl != nullptr, Result::FAILURE, "CreateQueryPoolVK: 'impl' is NULL"); - - const QueryType queryType = GetQueryTypeVK(queryPoolVKDesc.vkQueryType); - + QueryType queryType = GetQueryTypeVK(queryPoolVKDesc.vkQueryType); queryPool = (QueryPool*)Allocate(GetStdAllocator(), *this, queryPoolImpl, queryType, 0); } return result; } -Result DeviceVal::CreateAccelerationStructureVK(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { +Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { RETURN_ON_FAILURE(this, accelerationStructureDesc.vkAccelerationStructure != 0, Result::INVALID_ARGUMENT, - "CreateAccelerationStructureVK: 'accelerationStructureDesc.vkAccelerationStructure' is NULL"); + "CreateAccelerationStructure: 'accelerationStructureDesc.vkAccelerationStructure' is NULL"); AccelerationStructure* accelerationStructureImpl = nullptr; - const Result result = m_WrapperVKAPI.CreateAccelerationStructureVK(m_Device, accelerationStructureDesc, accelerationStructureImpl); + Result result = m_WrapperVKAPI.CreateAccelerationStructureVK(m_Device, accelerationStructureDesc, accelerationStructureImpl); if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, accelerationStructureImpl != nullptr, Result::FAILURE, "CreateAccelerationStructureVK: 'impl' is NULL"); - - accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, true); + MemoryDesc memoryDesc = {}; + accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, true, memoryDesc); } return result; @@ -941,47 +911,38 @@ Result DeviceVal::CreateAccelerationStructureVK(const AccelerationStructureVKDes #if NRI_USE_D3D11 -Result DeviceVal::CreateCommandBufferD3D11(const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE(this, commandBufferDesc.d3d11DeviceContext != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBufferD3D11: 'commandBufferDesc.d3d11DeviceContext' is NULL"); +Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { + RETURN_ON_FAILURE(this, commandBufferDesc.d3d11DeviceContext != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBuffer: 'commandBufferDesc.d3d11DeviceContext' is NULL"); CommandBuffer* commandBufferImpl = nullptr; - const Result result = m_WrapperD3D11API.CreateCommandBufferD3D11(m_Device, commandBufferDesc, commandBufferImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandBufferImpl != nullptr, Result::FAILURE, "CreateCommandBufferD3D11: 'impl' is NULL"); + Result result = m_WrapperD3D11API.CreateCommandBufferD3D11(m_Device, commandBufferDesc, commandBufferImpl); + if (result == Result::SUCCESS) commandBuffer = (CommandBuffer*)Allocate(GetStdAllocator(), *this, commandBufferImpl, true); - } return result; } -Result DeviceVal::CreateBufferD3D11(const BufferD3D11Desc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "CreateBufferD3D11: 'bufferDesc.d3d11Resource' is NULL"); +Result DeviceVal::CreateBuffer(const BufferD3D11Desc& bufferDesc, Buffer*& buffer) { + RETURN_ON_FAILURE(this, bufferDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "CreateBuffer: 'bufferDesc.d3d11Resource' is NULL"); Buffer* bufferImpl = nullptr; - const Result result = m_WrapperD3D11API.CreateBufferD3D11(m_Device, bufferDesc, bufferImpl); + Result result = m_WrapperD3D11API.CreateBufferD3D11(m_Device, bufferDesc, bufferImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, bufferImpl != nullptr, Result::FAILURE, "CreateBufferD3D11: 'impl' is NULL"); - - buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl); - } + if (result == Result::SUCCESS) + buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl, true); return result; } -Result DeviceVal::CreateTextureD3D11(const TextureD3D11Desc& textureDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "CreateTextureD3D11: 'textureDesc.d3d11Resource' is NULL"); +Result DeviceVal::CreateTexture(const TextureD3D11Desc& textureDesc, Texture*& texture) { + RETURN_ON_FAILURE(this, textureDesc.d3d11Resource != nullptr, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.d3d11Resource' is NULL"); Texture* textureImpl = nullptr; - const Result result = m_WrapperD3D11API.CreateTextureD3D11(m_Device, textureDesc, textureImpl); + Result result = m_WrapperD3D11API.CreateTextureD3D11(m_Device, textureDesc, textureImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, textureImpl != nullptr, Result::FAILURE, "CreateTextureD3D11: 'impl' is NULL"); - - texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl); - } + if (result == Result::SUCCESS) + texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl, true); return result; } @@ -990,100 +951,81 @@ Result DeviceVal::CreateTextureD3D11(const TextureD3D11Desc& textureDesc, Textur #if NRI_USE_D3D12 -Result DeviceVal::CreateCommandBufferD3D12(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { - RETURN_ON_FAILURE( - this, commandBufferDesc.d3d12CommandAllocator != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBufferD3D12: 'commandBufferDesc.d3d12CommandAllocator' is NULL"); +Result DeviceVal::CreateCommandBuffer(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer) { + RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandAllocator != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBuffer: 'commandBufferDesc.d3d12CommandAllocator' is NULL"); - RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandList != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBufferD3D12: 'commandBufferDesc.d3d12CommandList' is NULL"); + RETURN_ON_FAILURE(this, commandBufferDesc.d3d12CommandList != nullptr, Result::INVALID_ARGUMENT, "CreateCommandBuffer: 'commandBufferDesc.d3d12CommandList' is NULL"); CommandBuffer* commandBufferImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateCommandBufferD3D12(m_Device, commandBufferDesc, commandBufferImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, commandBufferImpl != nullptr, Result::FAILURE, "CreateCommandBufferD3D12: 'impl' is NULL"); + Result result = m_WrapperD3D12API.CreateCommandBufferD3D12(m_Device, commandBufferDesc, commandBufferImpl); + if (result == Result::SUCCESS) commandBuffer = (CommandBuffer*)Allocate(GetStdAllocator(), *this, commandBufferImpl, true); - } return result; } -Result DeviceVal::CreateDescriptorPoolD3D12(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { +Result DeviceVal::CreateDescriptorPool(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { RETURN_ON_FAILURE(this, descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap == nullptr && descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap == nullptr, - Result::INVALID_ARGUMENT, - "CreateDescriptorPoolD3D12: 'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' and 'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' are NULL"); + Result::INVALID_ARGUMENT, "CreateDescriptorPool: 'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' and 'descriptorPoolD3D12Desc.d3d12ResourceDescriptorHeap' are NULL"); DescriptorPool* descriptorPoolImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateDescriptorPoolD3D12(m_Device, descriptorPoolD3D12Desc, descriptorPoolImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, descriptorPoolImpl != nullptr, Result::FAILURE, "CreateDescriptorPoolD3D12: 'impl' is NULL"); + Result result = m_WrapperD3D12API.CreateDescriptorPoolD3D12(m_Device, descriptorPoolD3D12Desc, descriptorPoolImpl); + if (result == Result::SUCCESS) descriptorPool = (DescriptorPool*)Allocate(GetStdAllocator(), *this, descriptorPoolImpl, descriptorPoolD3D12Desc.descriptorSetMaxNum); - } return result; } -Result DeviceVal::CreateBufferD3D12(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { - RETURN_ON_FAILURE(this, bufferDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateBufferD3D12: 'bufferDesc.d3d12Resource' is NULL"); +Result DeviceVal::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { + RETURN_ON_FAILURE(this, bufferDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateBuffer: 'bufferDesc.d3d12Resource' is NULL"); Buffer* bufferImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateBufferD3D12(m_Device, bufferDesc, bufferImpl); + Result result = m_WrapperD3D12API.CreateBufferD3D12(m_Device, bufferDesc, bufferImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, bufferImpl != nullptr, Result::FAILURE, "CreateBufferD3D12: 'impl' is NULL"); - - buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl); - } + if (result == Result::SUCCESS) + buffer = (Buffer*)Allocate(GetStdAllocator(), *this, bufferImpl, true); return result; } -Result DeviceVal::CreateTextureD3D12(const TextureD3D12Desc& textureDesc, Texture*& texture) { - RETURN_ON_FAILURE(this, textureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateTextureD3D12: 'textureDesc.d3d12Resource' is NULL"); +Result DeviceVal::CreateTexture(const TextureD3D12Desc& textureDesc, Texture*& texture) { + RETURN_ON_FAILURE(this, textureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateTexture: 'textureDesc.d3d12Resource' is NULL"); Texture* textureImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateTextureD3D12(m_Device, textureDesc, textureImpl); - - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, textureImpl != nullptr, Result::FAILURE, "CreateTextureD3D12: 'impl' is NULL"); + Result result = m_WrapperD3D12API.CreateTextureD3D12(m_Device, textureDesc, textureImpl); - texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl); - } + if (result == Result::SUCCESS) + texture = (Texture*)Allocate(GetStdAllocator(), *this, textureImpl, true); return result; } -Result DeviceVal::CreateMemoryD3D12(const MemoryD3D12Desc& memoryDesc, Memory*& memory) { - RETURN_ON_FAILURE(this, (memoryDesc.d3d12Heap != nullptr || memoryDesc.d3d12HeapDesc != nullptr), Result::INVALID_ARGUMENT, - "CreateMemoryD3D12: 'memoryDesc.d3d12Heap' or 'memoryDesc.d3d12HeapDesc' is NULL"); +Result DeviceVal::CreateMemory(const MemoryD3D12Desc& memoryDesc, Memory*& memory) { + RETURN_ON_FAILURE(this, memoryDesc.d3d12Heap != nullptr, Result::INVALID_ARGUMENT, "CreateMemory: 'memoryDesc.d3d12Heap' is NULL"); Memory* memoryImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateMemoryD3D12(m_Device, memoryDesc, memoryImpl); + Result result = m_WrapperD3D12API.CreateMemoryD3D12(m_Device, memoryDesc, memoryImpl); const uint64_t size = GetMemorySizeD3D12(memoryDesc); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, memoryImpl != nullptr, Result::FAILURE, "CreateMemoryD3D12: 'impl' is NULL"); - + if (result == Result::SUCCESS) memory = (Memory*)Allocate(GetStdAllocator(), *this, memoryImpl, size, MemoryLocation::MAX_NUM); - } return result; } -Result DeviceVal::CreateAccelerationStructureD3D12(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { +Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { RETURN_ON_FAILURE( - this, accelerationStructureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateAccelerationStructureD3D12: 'accelerationStructureDesc.d3d12Resource' is NULL"); + this, accelerationStructureDesc.d3d12Resource != nullptr, Result::INVALID_ARGUMENT, "CreateAccelerationStructure: 'accelerationStructureDesc.d3d12Resource' is NULL"); AccelerationStructure* accelerationStructureImpl = nullptr; - const Result result = m_WrapperD3D12API.CreateAccelerationStructureD3D12(m_Device, accelerationStructureDesc, accelerationStructureImpl); + Result result = m_WrapperD3D12API.CreateAccelerationStructureD3D12(m_Device, accelerationStructureDesc, accelerationStructureImpl); if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, accelerationStructureImpl != nullptr, Result::FAILURE, "CreateAccelerationStructureD3D12: 'impl' is NULL"); - - accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, true); + MemoryDesc memoryDesc = {}; + accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, true, memoryDesc); } return result; @@ -1096,7 +1038,7 @@ uint32_t DeviceVal::CalculateAllocationNumber(const ResourceGroupDesc& resourceG RETURN_ON_FAILURE(this, resourceGroupDesc.bufferNum == 0 || resourceGroupDesc.buffers != nullptr, 0, "CalculateAllocationNumber: 'resourceGroupDesc.buffers' is NULL"); RETURN_ON_FAILURE(this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, 0, "CalculateAllocationNumber: 'resourceGroupDesc.textures' is NULL"); - Buffer** buffersImpl = STACK_ALLOC(Buffer*, resourceGroupDesc.bufferNum); + Buffer** buffersImpl = StackAlloc(Buffer*, resourceGroupDesc.bufferNum); for (uint32_t i = 0; i < resourceGroupDesc.bufferNum; i++) { RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, 0, "CalculateAllocationNumber: 'resourceGroupDesc.buffers[%u]' is NULL", i); @@ -1105,7 +1047,7 @@ uint32_t DeviceVal::CalculateAllocationNumber(const ResourceGroupDesc& resourceG buffersImpl[i] = bufferVal.GetImpl(); } - Texture** texturesImpl = STACK_ALLOC(Texture*, resourceGroupDesc.textureNum); + Texture** texturesImpl = StackAlloc(Texture*, resourceGroupDesc.textureNum); for (uint32_t i = 0; i < resourceGroupDesc.textureNum; i++) { RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, 0, "CalculateAllocationNumber: 'resourceGroupDesc.textures[%u]' is NULL", i); @@ -1133,7 +1075,7 @@ Result DeviceVal::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDe RETURN_ON_FAILURE( this, resourceGroupDesc.textureNum == 0 || resourceGroupDesc.textures != nullptr, Result::INVALID_ARGUMENT, "AllocateAndBindMemory: 'resourceGroupDesc.textures' is NULL"); - Buffer** buffersImpl = STACK_ALLOC(Buffer*, resourceGroupDesc.bufferNum); + Buffer** buffersImpl = StackAlloc(Buffer*, resourceGroupDesc.bufferNum); for (uint32_t i = 0; i < resourceGroupDesc.bufferNum; i++) { RETURN_ON_FAILURE(this, resourceGroupDesc.buffers[i] != nullptr, Result::INVALID_ARGUMENT, "AllocateAndBindMemory: 'resourceGroupDesc.buffers[%u]' is NULL", i); @@ -1142,7 +1084,7 @@ Result DeviceVal::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDe buffersImpl[i] = bufferVal.GetImpl(); } - Texture** texturesImpl = STACK_ALLOC(Texture*, resourceGroupDesc.textureNum); + Texture** texturesImpl = StackAlloc(Texture*, resourceGroupDesc.textureNum); for (uint32_t i = 0; i < resourceGroupDesc.textureNum; i++) { RETURN_ON_FAILURE(this, resourceGroupDesc.textures[i] != nullptr, Result::INVALID_ARGUMENT, "AllocateAndBindMemory: 'resourceGroupDesc.textures[%u]' is NULL", i); @@ -1157,7 +1099,7 @@ Result DeviceVal::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDe resourceGroupDescImpl.buffers = buffersImpl; resourceGroupDescImpl.textures = texturesImpl; - const Result result = m_HelperAPI.AllocateAndBindMemory(m_Device, resourceGroupDescImpl, allocations); + Result result = m_HelperAPI.AllocateAndBindMemory(m_Device, resourceGroupDescImpl, allocations); if (result == Result::SUCCESS) { for (uint32_t i = 0; i < resourceGroupDesc.bufferNum; i++) { @@ -1170,10 +1112,8 @@ Result DeviceVal::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDe textureVal.SetBoundToMemory(); } - for (uint32_t i = 0; i < allocationNum; i++) { - RETURN_ON_FAILURE(this, allocations[i] != nullptr, Result::FAILURE, "AllocateAndBindMemory: 'impl' is NULL"); + for (uint32_t i = 0; i < allocationNum; i++) allocations[i] = (Memory*)Allocate(GetStdAllocator(), *this, allocations[i], 0, resourceGroupDesc.memoryLocation); - } } return result; @@ -1183,35 +1123,32 @@ Result DeviceVal::QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemor return m_HelperAPI.QueryVideoMemoryInfo(m_Device, memoryLocation, videoMemoryInfo); } -Result DeviceVal::CreateRayTracingPipeline(const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { - RETURN_ON_FAILURE(this, pipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.pipelineLayout' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderLibrary != nullptr, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.shaderLibrary' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescs != nullptr, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.shaderGroupDescs' is NULL"); - RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescNum != 0, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.shaderGroupDescNum' is 0"); - RETURN_ON_FAILURE(this, pipelineDesc.recursionDepthMax != 0, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.recursionDepthMax' is 0"); +Result DeviceVal::CreatePipeline(const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { + RETURN_ON_FAILURE(this, pipelineDesc.pipelineLayout != nullptr, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.pipelineLayout' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderLibrary != nullptr, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.shaderLibrary' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescs != nullptr, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.shaderGroupDescs' is NULL"); + RETURN_ON_FAILURE(this, pipelineDesc.shaderGroupDescNum != 0, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.shaderGroupDescNum' is 0"); + RETURN_ON_FAILURE(this, pipelineDesc.recursionDepthMax != 0, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.recursionDepthMax' is 0"); uint32_t uniqueShaderStages = 0; for (uint32_t i = 0; i < pipelineDesc.shaderLibrary->shaderNum; i++) { const ShaderDesc& shaderDesc = pipelineDesc.shaderLibrary->shaders[i]; - RETURN_ON_FAILURE( - this, shaderDesc.bytecode != nullptr, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.shaderLibrary->shaders[%u].bytecode' is invalid", i); + RETURN_ON_FAILURE(this, shaderDesc.bytecode != nullptr, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.shaderLibrary->shaders[%u].bytecode' is invalid", i); - RETURN_ON_FAILURE(this, shaderDesc.size != 0, Result::INVALID_ARGUMENT, "CreateRayTracingPipeline: 'pipelineDesc.shaderLibrary->shaders[%u].size' is 0", i); + RETURN_ON_FAILURE(this, shaderDesc.size != 0, Result::INVALID_ARGUMENT, "CreatePipeline: 'pipelineDesc.shaderLibrary->shaders[%u].size' is 0", i); RETURN_ON_FAILURE(this, IsShaderStageValid(shaderDesc.stage, uniqueShaderStages, StageBits::RAY_TRACING_SHADERS), Result::INVALID_ARGUMENT, - "CreateRayTracingPipeline: 'pipelineDesc.shaderLibrary->shaders[%u].stage' must include only 1 ray tracing shader stage, unique for the entire pipeline", i); + "CreatePipeline: 'pipelineDesc.shaderLibrary->shaders[%u].stage' must include only 1 ray tracing shader stage, unique for the entire pipeline", i); } auto pipelineDescImpl = pipelineDesc; pipelineDescImpl.pipelineLayout = NRI_GET_IMPL(PipelineLayout, pipelineDesc.pipelineLayout); Pipeline* pipelineImpl = nullptr; - const Result result = m_RayTracingAPI.CreateRayTracingPipeline(m_Device, pipelineDescImpl, pipelineImpl); + Result result = m_RayTracingAPI.CreateRayTracingPipeline(m_Device, pipelineDescImpl, pipelineImpl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, pipelineImpl != nullptr, Result::FAILURE, "CreateRayTracingPipeline: 'impl' is NULL"); + if (result == Result::SUCCESS) pipeline = (Pipeline*)Allocate(GetStdAllocator(), *this, pipelineImpl); - } return result; } @@ -1222,45 +1159,69 @@ Result DeviceVal::CreateAccelerationStructure(const AccelerationStructureDesc& a AccelerationStructureDesc accelerationStructureDescImpl = accelerationStructureDesc; - Vector objectImplArray(GetStdAllocator()); + uint32_t geometryObjectNum = accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL ? accelerationStructureDesc.instanceOrGeometryObjectNum : 0; + Scratch objectImplArray = AllocateScratch(*this, GeometryObject, geometryObjectNum); + if (accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL) { - const uint32_t geometryObjectNum = accelerationStructureDesc.instanceOrGeometryObjectNum; - objectImplArray.resize(geometryObjectNum); - ConvertGeometryObjectsVal(objectImplArray.data(), accelerationStructureDesc.geometryObjects, geometryObjectNum); - accelerationStructureDescImpl.geometryObjects = objectImplArray.data(); + ConvertGeometryObjectsVal(objectImplArray, accelerationStructureDesc.geometryObjects, geometryObjectNum); + accelerationStructureDescImpl.geometryObjects = objectImplArray; + } + + AccelerationStructure* accelerationStructureImpl = nullptr; + Result result = m_RayTracingAPI.CreateAccelerationStructure(m_Device, accelerationStructureDescImpl, accelerationStructureImpl); + + if (result == Result::SUCCESS) { + MemoryDesc memoryDesc = {}; + m_RayTracingAPI.GetAccelerationStructureMemoryDesc(GetImpl(), accelerationStructureDescImpl, MemoryLocation::DEVICE, memoryDesc); + + accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, false, memoryDesc); + } + + return result; +} + +Result DeviceVal::CreateAccelerationStructure(const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + RETURN_ON_FAILURE(this, accelerationStructureDesc.desc.instanceOrGeometryObjectNum != 0, Result::INVALID_ARGUMENT, + "CreateAccelerationStructure: 'accelerationStructureDesc.instanceOrGeometryObjectNum' is 0"); + + AllocateAccelerationStructureDesc accelerationStructureDescImpl = accelerationStructureDesc; + + uint32_t geometryObjectNum = accelerationStructureDesc.desc.type == AccelerationStructureType::BOTTOM_LEVEL ? accelerationStructureDesc.desc.instanceOrGeometryObjectNum : 0; + Scratch objectImplArray = AllocateScratch(*this, GeometryObject, geometryObjectNum); + + if (accelerationStructureDesc.desc.type == AccelerationStructureType::BOTTOM_LEVEL) { + ConvertGeometryObjectsVal(objectImplArray, accelerationStructureDesc.desc.geometryObjects, geometryObjectNum); + accelerationStructureDescImpl.desc.geometryObjects = objectImplArray; } AccelerationStructure* accelerationStructureImpl = nullptr; - const Result result = m_RayTracingAPI.CreateAccelerationStructure(m_Device, accelerationStructureDescImpl, accelerationStructureImpl); + Result result = m_ResourceAllocatorAPI.AllocateAccelerationStructure(m_Device, accelerationStructureDescImpl, accelerationStructureImpl); if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(this, accelerationStructureImpl != nullptr, Result::FAILURE, "CreateAccelerationStructure: 'impl' is NULL"); - accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, false); + MemoryDesc memoryDesc = {}; + m_RayTracingAPI.GetAccelerationStructureMemoryDesc(GetImpl(), accelerationStructureDescImpl.desc, MemoryLocation::DEVICE, memoryDesc); + + accelerationStructure = (AccelerationStructure*)Allocate(GetStdAllocator(), *this, accelerationStructureImpl, true, memoryDesc); } return result; } Result DeviceVal::BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum) { - if (memoryBindingDescNum == 0) - return Result::SUCCESS; - RETURN_ON_FAILURE(this, memoryBindingDescs != nullptr, Result::INVALID_ARGUMENT, "BindAccelerationStructureMemory: 'memoryBindingDescs' is NULL"); - AccelerationStructureMemoryBindingDesc* memoryBindingDescsImpl = STACK_ALLOC(AccelerationStructureMemoryBindingDesc, memoryBindingDescNum); + AccelerationStructureMemoryBindingDesc* memoryBindingDescsImpl = StackAlloc(AccelerationStructureMemoryBindingDesc, memoryBindingDescNum); for (uint32_t i = 0; i < memoryBindingDescNum; i++) { AccelerationStructureMemoryBindingDesc& destDesc = memoryBindingDescsImpl[i]; const AccelerationStructureMemoryBindingDesc& srcDesc = memoryBindingDescs[i]; MemoryVal& memory = (MemoryVal&)*srcDesc.memory; AccelerationStructureVal& accelerationStructure = (AccelerationStructureVal&)*srcDesc.accelerationStructure; + const MemoryDesc& memoryDesc = accelerationStructure.GetMemoryDesc(); RETURN_ON_FAILURE(this, !accelerationStructure.IsBoundToMemory(), Result::INVALID_ARGUMENT, "BindAccelerationStructureMemory: 'memoryBindingDescs[%u].accelerationStructure' is already bound to memory", i); - MemoryDesc memoryDesc = {}; - accelerationStructure.GetMemoryDesc(memoryDesc); - RETURN_ON_FAILURE(this, !memoryDesc.mustBeDedicated || srcDesc.offset == 0, Result::INVALID_ARGUMENT, "BindAccelerationStructureMemory: 'memoryBindingDescs[%u].offset' must be 0 for dedicated allocation", i); @@ -1280,7 +1241,7 @@ Result DeviceVal::BindAccelerationStructureMemory(const AccelerationStructureMem destDesc.accelerationStructure = accelerationStructure.GetImpl(); } - const Result result = m_RayTracingAPI.BindAccelerationStructureMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); + Result result = m_RayTracingAPI.BindAccelerationStructureMemory(m_Device, memoryBindingDescsImpl, memoryBindingDescNum); if (result == Result::SUCCESS) { for (uint32_t i = 0; i < memoryBindingDescNum; i++) { @@ -1293,19 +1254,19 @@ Result DeviceVal::BindAccelerationStructureMemory(const AccelerationStructureMem } void DeviceVal::DestroyAccelerationStructure(AccelerationStructure& accelerationStructure) { - Deallocate(GetStdAllocator(), (AccelerationStructureVal*)&accelerationStructure); + Destroy(GetStdAllocator(), (AccelerationStructureVal*)&accelerationStructure); } -void DeviceVal::Destroy() { - Deallocate(GetStdAllocator(), this); +void DeviceVal::Destruct() { + Destroy(GetStdAllocator(), this); } DeviceBase* CreateDeviceValidation(const DeviceCreationDesc& deviceCreationDesc, DeviceBase& device) { - StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); + StdAllocator allocator(deviceCreationDesc.allocationCallbacks); DeviceVal* deviceVal = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator, device); if (!deviceVal->Create()) { - Deallocate(allocator, deviceVal); + Destroy(allocator, deviceVal); return nullptr; } diff --git a/Source/Validation/DeviceVal.h b/Source/Validation/DeviceVal.h index 9da96540..88119434 100644 --- a/Source/Validation/DeviceVal.h +++ b/Source/Validation/DeviceVal.h @@ -64,93 +64,94 @@ struct DeviceVal final : public DeviceBase { bool Create(); void RegisterMemoryType(MemoryType memoryType, MemoryLocation memoryLocation); - void GetMemoryDesc(const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc); - void GetMemoryDesc(const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc); //================================================================================================================ // NRI //================================================================================================================ - Result CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain); - void DestroySwapChain(SwapChain& swapChain); - void SetDebugName(const char* name); - Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); - Result CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator); - Result CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool); + + Result CreateFence(uint64_t initialValue, Fence*& fence); + Result CreateMemory(const MemoryVKDesc& memoryVKDesc, Memory*& memory); + Result CreateMemory(const MemoryD3D12Desc& memoryDesc, Memory*& memory); Result CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer); + Result CreateBuffer(const AllocateBufferDesc& bufferDesc, Buffer*& buffer); + Result CreateBuffer(const BufferVKDesc& bufferDesc, Buffer*& buffer); + Result CreateBuffer(const BufferD3D11Desc& bufferDesc, Buffer*& buffer); + Result CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer); Result CreateTexture(const TextureDesc& textureDesc, Texture*& texture); + Result CreateTexture(const AllocateTextureDesc& textureDesc, Texture*& texture); + Result CreateTexture(const TextureVKDesc& textureVKDesc, Texture*& texture); + Result CreateTexture(const TextureD3D11Desc& textureDesc, Texture*& texture); + Result CreateTexture(const TextureD3D12Desc& textureDesc, Texture*& texture); + Result CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline); + Result CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline); + Result CreatePipeline(const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline); + Result CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool); + Result CreateQueryPool(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool); + Result CreateSwapChain(const SwapChainDesc& swapChainDesc, SwapChain*& swapChain); + Result CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler); Result CreateDescriptor(const BufferViewDesc& bufferViewDesc, Descriptor*& bufferView); Result CreateDescriptor(const Texture1DViewDesc& textureViewDesc, Descriptor*& textureView); Result CreateDescriptor(const Texture2DViewDesc& textureViewDesc, Descriptor*& textureView); Result CreateDescriptor(const Texture3DViewDesc& textureViewDesc, Descriptor*& textureView); - Result CreateDescriptor(const SamplerDesc& samplerDesc, Descriptor*& sampler); + Result CreateCommandQueue(const CommandQueueVKDesc& commandQueueDesc, CommandQueue*& commandQueue); + Result CreateCommandBuffer(const CommandBufferVKDesc& commandBufferDesc, CommandBuffer*& commandBuffer); + Result CreateCommandBuffer(const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer); + Result CreateCommandBuffer(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer); Result CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout); - Result CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline); - Result CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline); - Result CreateQueryPool(const QueryPoolDesc& queryPoolDesc, QueryPool*& queryPool); - Result CreateFence(uint64_t initialValue, Fence*& fence); - void DestroyCommandAllocator(CommandAllocator& commandAllocator); - void DestroyDescriptorPool(DescriptorPool& descriptorPool); + Result CreateDescriptorPool(const DescriptorPoolDesc& descriptorPoolDesc, DescriptorPool*& descriptorPool); + Result CreateDescriptorPool(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool); + Result CreateDescriptorPool(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool); + Result CreateComputePipeline(VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline); + Result CreateGraphicsPipeline(VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline); + Result CreateCommandAllocator(const CommandQueue& commandQueue, CommandAllocator*& commandAllocator); + Result CreateCommandAllocator(const CommandAllocatorVKDesc& commandAllocatorDesc, CommandAllocator*& commandAllocator); + Result CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); + Result CreateAccelerationStructure(const AllocateAccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); + Result CreateAccelerationStructure(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); + Result CreateAccelerationStructure(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); + + void DestroyFence(Fence& fence); void DestroyBuffer(Buffer& buffer); void DestroyTexture(Texture& texture); - void DestroyDescriptor(Descriptor& descriptor); - void DestroyPipelineLayout(PipelineLayout& pipelineLayout); void DestroyPipeline(Pipeline& pipeline); void DestroyQueryPool(QueryPool& queryPool); - void DestroyFence(Fence& queueSemaphore); + void DestroySwapChain(SwapChain& swapChain); + void DestroyDescriptor(Descriptor& descriptor); + void DestroyDescriptorPool(DescriptorPool& descriptorPool); + void DestroyPipelineLayout(PipelineLayout& pipelineLayout); + void DestroyCommandBuffer(CommandBuffer& commandBuffer); + void DestroyCommandAllocator(CommandAllocator& commandAllocator); + void DestroyAccelerationStructure(AccelerationStructure& accelerationStructure); + + void FreeMemory(Memory& memory); + void SetDebugName(const char* name); + Result GetCommandQueue(CommandQueueType commandQueueType, CommandQueue*& commandQueue); Result AllocateMemory(const AllocateMemoryDesc& allocateMemoryDesc, Memory*& memory); Result BindBufferMemory(const BufferMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); Result BindTextureMemory(const TextureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void FreeMemory(Memory& memory); - - Result CreateRayTracingPipeline(const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline); - Result CreateAccelerationStructure(const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); + Result QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) const; + Result AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations); Result BindAccelerationStructureMemory(const AccelerationStructureMemoryBindingDesc* memoryBindingDescs, uint32_t memoryBindingDescNum); - void DestroyAccelerationStructure(AccelerationStructure& accelerationStructure); - FormatSupportBits GetFormatSupport(Format format) const; - - Result CreateCommandQueueVK(const CommandQueueVKDesc& commandQueueDesc, CommandQueue*& commandQueue); - Result CreateCommandAllocatorVK(const CommandAllocatorVKDesc& commandAllocatorDesc, CommandAllocator*& commandAllocator); - Result CreateCommandBufferVK(const CommandBufferVKDesc& commandBufferDesc, CommandBuffer*& commandBuffer); - Result CreateDescriptorPoolVK(const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool); - Result CreateBufferVK(const BufferVKDesc& bufferDesc, Buffer*& buffer); - Result CreateTextureVK(const TextureVKDesc& textureVKDesc, Texture*& texture); - Result CreateMemoryVK(const MemoryVKDesc& memoryVKDesc, Memory*& memory); - Result CreateGraphicsPipelineVK(NRIVkPipeline vkPipeline, Pipeline*& pipeline); - Result CreateComputePipelineVK(NRIVkPipeline vkPipeline, Pipeline*& pipeline); - Result CreateQueryPoolVK(const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool); - Result CreateAccelerationStructureVK(const AccelerationStructureVKDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - - Result CreateCommandBufferD3D11(const CommandBufferD3D11Desc& commandBufferDesc, CommandBuffer*& commandBuffer); - Result CreateBufferD3D11(const BufferD3D11Desc& bufferDesc, Buffer*& buffer); - Result CreateTextureD3D11(const TextureD3D11Desc& textureDesc, Texture*& texture); - - Result CreateCommandBufferD3D12(const CommandBufferD3D12Desc& commandBufferDesc, CommandBuffer*& commandBuffer); - Result CreateDescriptorPoolD3D12(const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool); - Result CreateBufferD3D12(const BufferD3D12Desc& bufferDesc, Buffer*& buffer); - Result CreateTextureD3D12(const TextureD3D12Desc& textureDesc, Texture*& texture); - Result CreateMemoryD3D12(const MemoryD3D12Desc& memoryDesc, Memory*& memory); - Result CreateAccelerationStructureD3D12(const AccelerationStructureD3D12Desc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure); - uint32_t CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const; - Result AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations); - Result QueryVideoMemoryInfo(MemoryLocation memoryLocation, VideoMemoryInfo& videoMemoryInfo) const; + FormatSupportBits GetFormatSupport(Format format) const; //================================================================================================================ // DeviceBase //================================================================================================================ const DeviceDesc& GetDesc() const; - void Destroy(); + void Destruct(); Result FillFunctionTable(CoreInterface& table) const; Result FillFunctionTable(HelperInterface& table) const; - Result FillFunctionTable(StreamerInterface& streamerInterface) const; + Result FillFunctionTable(LowLatencyInterface& table) const; + Result FillFunctionTable(MeshShaderInterface& table) const; + Result FillFunctionTable(ResourceAllocatorInterface& table) const; + Result FillFunctionTable(RayTracingInterface& table) const; + Result FillFunctionTable(StreamerInterface& table) const; + Result FillFunctionTable(SwapChainInterface& table) const; Result FillFunctionTable(WrapperD3D11Interface& table) const; Result FillFunctionTable(WrapperD3D12Interface& table) const; Result FillFunctionTable(WrapperVKInterface& table) const; - Result FillFunctionTable(SwapChainInterface& table) const; - Result FillFunctionTable(RayTracingInterface& table) const; - Result FillFunctionTable(MeshShaderInterface& table) const; - Result FillFunctionTable(LowLatencyInterface& table) const; private: Device& m_Device; @@ -158,22 +159,23 @@ struct DeviceVal final : public DeviceBase { CoreInterface m_CoreAPI = {}; HelperInterface m_HelperAPI = {}; StreamerInterface m_StreamerAPI = {}; + LowLatencyInterface m_LowLatencyAPI = {}; + MeshShaderInterface m_MeshShaderAPI = {}; + RayTracingInterface m_RayTracingAPI = {}; + ResourceAllocatorInterface m_ResourceAllocatorAPI = {}; + SwapChainInterface m_SwapChainAPI = {}; WrapperD3D11Interface m_WrapperD3D11API = {}; WrapperD3D12Interface m_WrapperD3D12API = {}; WrapperVKInterface m_WrapperVKAPI = {}; - SwapChainInterface m_SwapChainAPI = {}; - RayTracingInterface m_RayTracingAPI = {}; - MeshShaderInterface m_MeshShaderAPI = {}; - LowLatencyInterface m_LowLatencyAPI = {}; std::array m_CommandQueues = {}; UnorderedMap m_MemoryTypeMap; + bool m_IsLowLatencySupported = false; + bool m_IsMeshShaderSupported = false; + bool m_IsRayTracingSupported = false; + bool m_IsSwapChainSupported = false; bool m_IsWrapperD3D11Supported = false; bool m_IsWrapperD3D12Supported = false; bool m_IsWrapperVKSupported = false; - bool m_IsSwapChainSupported = false; - bool m_IsRayTracingSupported = false; - bool m_IsMeshShaderSupported = false; - bool m_IsLowLatencySupported = false; Lock m_Lock; }; diff --git a/Source/Validation/DeviceVal.hpp b/Source/Validation/DeviceVal.hpp index 9e30101d..09bbd18a 100644 --- a/Source/Validation/DeviceVal.hpp +++ b/Source/Validation/DeviceVal.hpp @@ -21,11 +21,15 @@ static FormatSupportBits NRI_CALL GetFormatSupport(const Device& device, Format } static void NRI_CALL GetBufferMemoryDesc(const Device& device, const BufferDesc& bufferDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { - ((DeviceVal&)device).GetMemoryDesc(bufferDesc, memoryLocation, memoryDesc); + DeviceVal& deviceVal = (DeviceVal&)device; + deviceVal.GetCoreInterface().GetBufferMemoryDesc(deviceVal.GetImpl(), bufferDesc, memoryLocation, memoryDesc); + deviceVal.RegisterMemoryType(memoryDesc.type, memoryLocation); } static void NRI_CALL GetTextureMemoryDesc(const Device& device, const TextureDesc& textureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { - ((DeviceVal&)device).GetMemoryDesc(textureDesc, memoryLocation, memoryDesc); + DeviceVal& deviceVal = (DeviceVal&)device; + deviceVal.GetCoreInterface().GetTextureMemoryDesc(deviceVal.GetImpl(), textureDesc, memoryLocation, memoryDesc); + deviceVal.RegisterMemoryType(memoryDesc.type, memoryLocation); } static Result NRI_CALL GetCommandQueue(Device& device, CommandQueueType commandQueueType, CommandQueue*& commandQueue) { @@ -96,6 +100,13 @@ static Result NRI_CALL CreateFence(Device& device, uint64_t initialValue, Fence* return ((DeviceVal&)device).CreateFence(initialValue, fence); } +static void NRI_CALL DestroyCommandBuffer(CommandBuffer& commandBuffer) { + if (!(&commandBuffer)) + return; + + GetDeviceVal(commandBuffer).DestroyCommandBuffer(commandBuffer); +} + static void NRI_CALL DestroyCommandAllocator(CommandAllocator& commandAllocator) { if (!(&commandAllocator)) return; @@ -201,96 +212,21 @@ static void* NRI_CALL GetDeviceNativeObject(const Device& device) { return ((DeviceVal&)device).GetNativeObject(); } -Result DeviceVal::FillFunctionTable(CoreInterface& coreInterface) const { - coreInterface = {}; - coreInterface.GetDeviceDesc = ::GetDeviceDesc; - coreInterface.GetBufferDesc = ::GetBufferDesc; - coreInterface.GetTextureDesc = ::GetTextureDesc; - coreInterface.GetFormatSupport = ::GetFormatSupport; - coreInterface.GetBufferMemoryDesc = ::GetBufferMemoryDesc; - coreInterface.GetTextureMemoryDesc = ::GetTextureMemoryDesc; - coreInterface.GetCommandQueue = ::GetCommandQueue; - coreInterface.CreateCommandAllocator = ::CreateCommandAllocator; - coreInterface.CreateDescriptorPool = ::CreateDescriptorPool; - coreInterface.CreateBuffer = ::CreateBuffer; - coreInterface.CreateTexture = ::CreateTexture; - coreInterface.CreateBufferView = ::CreateBufferView; - coreInterface.CreateTexture1DView = ::CreateTexture1DView; - coreInterface.CreateTexture2DView = ::CreateTexture2DView; - coreInterface.CreateTexture3DView = ::CreateTexture3DView; - coreInterface.CreateSampler = ::CreateSampler; - coreInterface.CreatePipelineLayout = ::CreatePipelineLayout; - coreInterface.CreateGraphicsPipeline = ::CreateGraphicsPipeline; - coreInterface.CreateComputePipeline = ::CreateComputePipeline; - coreInterface.CreateQueryPool = ::CreateQueryPool; - coreInterface.CreateFence = ::CreateFence; - coreInterface.DestroyCommandAllocator = ::DestroyCommandAllocator; - coreInterface.DestroyDescriptorPool = ::DestroyDescriptorPool; - coreInterface.DestroyBuffer = ::DestroyBuffer; - coreInterface.DestroyTexture = ::DestroyTexture; - coreInterface.DestroyDescriptor = ::DestroyDescriptor; - coreInterface.DestroyPipelineLayout = ::DestroyPipelineLayout; - coreInterface.DestroyPipeline = ::DestroyPipeline; - coreInterface.DestroyQueryPool = ::DestroyQueryPool; - coreInterface.DestroyFence = ::DestroyFence; - coreInterface.AllocateMemory = ::AllocateMemory; - coreInterface.BindBufferMemory = ::BindBufferMemory; - coreInterface.BindTextureMemory = ::BindTextureMemory; - coreInterface.FreeMemory = ::FreeMemory; - coreInterface.SetDeviceDebugName = ::SetDeviceDebugName; - coreInterface.SetPipelineDebugName = ::SetPipelineDebugName; - coreInterface.SetPipelineLayoutDebugName = ::SetPipelineLayoutDebugName; - coreInterface.SetMemoryDebugName = ::SetMemoryDebugName; - coreInterface.GetDeviceNativeObject = ::GetDeviceNativeObject; - - Core_Buffer_PartiallyFillFunctionTableVal(coreInterface); - Core_CommandAllocator_PartiallyFillFunctionTableVal(coreInterface); - Core_CommandBuffer_PartiallyFillFunctionTableVal(coreInterface); - Core_CommandQueue_PartiallyFillFunctionTableVal(coreInterface); - Core_Descriptor_PartiallyFillFunctionTableVal(coreInterface); - Core_DescriptorPool_PartiallyFillFunctionTableVal(coreInterface); - Core_DescriptorSet_PartiallyFillFunctionTableVal(coreInterface); - Core_Fence_PartiallyFillFunctionTableVal(coreInterface); - Core_QueryPool_PartiallyFillFunctionTableVal(coreInterface); - Core_Texture_PartiallyFillFunctionTableVal(coreInterface); - - return ValidateFunctionTable(coreInterface); -} - -#pragma endregion - -#pragma region[ WrapperD3D11 ] - -#if NRI_USE_D3D11 - -static Result NRI_CALL CreateCommandBufferD3D11(Device& device, const CommandBufferD3D11Desc& commandBufferD3D11Desc, CommandBuffer*& commandBuffer) { - return ((DeviceVal&)device).CreateCommandBufferD3D11(commandBufferD3D11Desc, commandBuffer); -} - -static Result NRI_CALL CreateBufferD3D11(Device& device, const BufferD3D11Desc& bufferD3D11Desc, Buffer*& buffer) { - return ((DeviceVal&)device).CreateBufferD3D11(bufferD3D11Desc, buffer); -} - -static Result NRI_CALL CreateTextureD3D11(Device& device, const TextureD3D11Desc& textureD3D11Desc, Texture*& texture) { - return ((DeviceVal&)device).CreateTextureD3D11(textureD3D11Desc, texture); -} - -#endif - -Result DeviceVal::FillFunctionTable(WrapperD3D11Interface& wrapperD3D11Interface) const { - wrapperD3D11Interface = {}; -#if NRI_USE_D3D11 - if (!m_IsWrapperD3D11Supported) - return Result::UNSUPPORTED; - - wrapperD3D11Interface.CreateCommandBufferD3D11 = ::CreateCommandBufferD3D11; - wrapperD3D11Interface.CreateBufferD3D11 = ::CreateBufferD3D11; - wrapperD3D11Interface.CreateTextureD3D11 = ::CreateTextureD3D11; +Result DeviceVal::FillFunctionTable(CoreInterface& table) const { + table = {}; + Core_Device_PartiallyFillFunctionTableVal(table); + Core_Buffer_PartiallyFillFunctionTableVal(table); + Core_CommandAllocator_PartiallyFillFunctionTableVal(table); + Core_CommandBuffer_PartiallyFillFunctionTableVal(table); + Core_CommandQueue_PartiallyFillFunctionTableVal(table); + Core_Descriptor_PartiallyFillFunctionTableVal(table); + Core_DescriptorPool_PartiallyFillFunctionTableVal(table); + Core_DescriptorSet_PartiallyFillFunctionTableVal(table); + Core_Fence_PartiallyFillFunctionTableVal(table); + Core_QueryPool_PartiallyFillFunctionTableVal(table); + Core_Texture_PartiallyFillFunctionTableVal(table); - return ValidateFunctionTable(wrapperD3D11Interface); -#else - return Result::UNSUPPORTED; -#endif + return ValidateFunctionTable(table); } #pragma endregion @@ -311,200 +247,65 @@ static Result NRI_CALL QueryVideoMemoryInfo(const Device& device, MemoryLocation Result DeviceVal::FillFunctionTable(HelperInterface& helperInterface) const { helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CalculateAllocationNumber; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; - helperInterface.QueryVideoMemoryInfo = ::QueryVideoMemoryInfo; - Helper_CommandQueue_PartiallyFillFunctionTableVal(helperInterface); + Helper_Device_PartiallyFillFunctionTableVal(helperInterface); return ValidateFunctionTable(helperInterface); } #pragma endregion -#pragma region[ WrapperD3D12 ] - -#if NRI_USE_D3D12 - -static Result NRI_CALL CreateCommandBufferD3D12(Device& device, const CommandBufferD3D12Desc& commandBufferD3D12Desc, CommandBuffer*& commandBuffer) { - return ((DeviceVal&)device).CreateCommandBufferD3D12(commandBufferD3D12Desc, commandBuffer); -} - -static Result NRI_CALL CreateDescriptorPoolD3D12(Device& device, const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { - return ((DeviceVal&)device).CreateDescriptorPoolD3D12(descriptorPoolD3D12Desc, descriptorPool); -} - -static Result NRI_CALL CreateBufferD3D12(Device& device, const BufferD3D12Desc& bufferD3D12Desc, Buffer*& buffer) { - return ((DeviceVal&)device).CreateBufferD3D12(bufferD3D12Desc, buffer); -} - -static Result NRI_CALL CreateTextureD3D12(Device& device, const TextureD3D12Desc& textureD3D12Desc, Texture*& texture) { - return ((DeviceVal&)device).CreateTextureD3D12(textureD3D12Desc, texture); -} - -static Result NRI_CALL CreateMemoryD3D12(Device& device, const MemoryD3D12Desc& memoryD3D12Desc, Memory*& memory) { - return ((DeviceVal&)device).CreateMemoryD3D12(memoryD3D12Desc, memory); -} - -static Result NRI_CALL CreateAccelerationStructureD3D12( - Device& device, const AccelerationStructureD3D12Desc& accelerationStructureD3D12Desc, AccelerationStructure*& accelerationStructure) { - return ((DeviceVal&)device).CreateAccelerationStructureD3D12(accelerationStructureD3D12Desc, accelerationStructure); -} - -#endif +#pragma region[ LowLatency ] -Result DeviceVal::FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interface) const { - wrapperD3D12Interface = {}; -#if NRI_USE_D3D12 - if (!m_IsWrapperD3D12Supported) +Result DeviceVal::FillFunctionTable(LowLatencyInterface& table) const { + table = {}; + if (!m_IsLowLatencySupported) return Result::UNSUPPORTED; - wrapperD3D12Interface.CreateCommandBufferD3D12 = ::CreateCommandBufferD3D12; - wrapperD3D12Interface.CreateDescriptorPoolD3D12 = ::CreateDescriptorPoolD3D12; - wrapperD3D12Interface.CreateBufferD3D12 = ::CreateBufferD3D12; - wrapperD3D12Interface.CreateTextureD3D12 = ::CreateTextureD3D12; - wrapperD3D12Interface.CreateMemoryD3D12 = ::CreateMemoryD3D12; - wrapperD3D12Interface.CreateAccelerationStructureD3D12 = ::CreateAccelerationStructureD3D12; - - return ValidateFunctionTable(wrapperD3D12Interface); + LowLatency_CommandQueue_PartiallyFillFunctionTableVal(table); + LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTableVal(table); -#else - return Result::UNSUPPORTED; -#endif + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ WrapperVK ] - -#if NRI_USE_VULKAN - -static Result NRI_CALL CreateCommandQueueVK(Device& device, const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { - return ((DeviceVal&)device).CreateCommandQueueVK(commandQueueVKDesc, commandQueue); -} - -static Result NRI_CALL CreateCommandAllocatorVK(Device& device, const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { - return ((DeviceVal&)device).CreateCommandAllocatorVK(commandAllocatorVKDesc, commandAllocator); -} - -static Result NRI_CALL CreateCommandBufferVK(Device& device, const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { - return ((DeviceVal&)device).CreateCommandBufferVK(commandBufferVKDesc, commandBuffer); -} - -static Result NRI_CALL CreateDescriptorPoolVK(Device& device, const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { - return ((DeviceVal&)device).CreateDescriptorPoolVK(descriptorPoolVKDesc, descriptorPool); -} - -static Result NRI_CALL CreateBufferVK(Device& device, const BufferVKDesc& bufferVKDesc, Buffer*& buffer) { - return ((DeviceVal&)device).CreateBufferVK(bufferVKDesc, buffer); -} - -static Result NRI_CALL CreateTextureVK(Device& device, const TextureVKDesc& textureVKDesc, Texture*& texture) { - return ((DeviceVal&)device).CreateTextureVK(textureVKDesc, texture); -} - -static Result NRI_CALL CreateMemoryVK(Device& device, const MemoryVKDesc& memoryVKDesc, Memory*& memory) { - return ((DeviceVal&)device).CreateMemoryVK(memoryVKDesc, memory); -} - -static Result NRI_CALL CreateGraphicsPipelineVK(Device& device, NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - return ((DeviceVal&)device).CreateGraphicsPipelineVK(vkPipeline, pipeline); -} - -static Result NRI_CALL CreateComputePipelineVK(Device& device, NRIVkPipeline vkPipeline, Pipeline*& pipeline) { - return ((DeviceVal&)device).CreateComputePipelineVK(vkPipeline, pipeline); -} - -static Result NRI_CALL CreateQueryPoolVK(Device& device, const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { - return ((DeviceVal&)device).CreateQueryPoolVK(queryPoolVKDesc, queryPool); -} - -static Result NRI_CALL CreateAccelerationStructureVK( - Device& device, const AccelerationStructureVKDesc& accelerationStructureVKDesc, AccelerationStructure*& accelerationStructure) { - return ((DeviceVal&)device).CreateAccelerationStructureVK(accelerationStructureVKDesc, accelerationStructure); -} - -static NRIVkPhysicalDevice NRI_CALL GetVkPhysicalDevice(const Device& device) { - return ((DeviceVal&)device).GetWrapperVKInterface().GetVkPhysicalDevice(((DeviceVal&)device).GetImpl()); -} - -static NRIVkInstance NRI_CALL GetVkInstance(const Device& device) { - return ((DeviceVal&)device).GetWrapperVKInterface().GetVkInstance(((DeviceVal&)device).GetImpl()); -} - -static NRIVkInstance NRI_CALL GetVkGetInstanceProcAddr(const Device& device) { - return ((DeviceVal&)device).GetWrapperVKInterface().GetVkGetInstanceProcAddr(((DeviceVal&)device).GetImpl()); -} - -static NRIVkInstance NRI_CALL GetVkGetDeviceProcAddr(const Device& device) { - return ((DeviceVal&)device).GetWrapperVKInterface().GetVkGetDeviceProcAddr(((DeviceVal&)device).GetImpl()); -} - -#endif +#pragma region[ MeshShader ] -Result DeviceVal::FillFunctionTable(WrapperVKInterface& wrapperVKInterface) const { - wrapperVKInterface = {}; -#if NRI_USE_VULKAN - if (!m_IsWrapperVKSupported) +Result DeviceVal::FillFunctionTable(MeshShaderInterface& table) const { + table = {}; + if (!m_IsMeshShaderSupported) return Result::UNSUPPORTED; - wrapperVKInterface.CreateCommandQueueVK = ::CreateCommandQueueVK; - wrapperVKInterface.CreateCommandAllocatorVK = ::CreateCommandAllocatorVK; - wrapperVKInterface.CreateCommandBufferVK = ::CreateCommandBufferVK; - wrapperVKInterface.CreateDescriptorPoolVK = ::CreateDescriptorPoolVK; - wrapperVKInterface.CreateBufferVK = ::CreateBufferVK; - wrapperVKInterface.CreateTextureVK = ::CreateTextureVK; - wrapperVKInterface.CreateMemoryVK = ::CreateMemoryVK; - wrapperVKInterface.CreateGraphicsPipelineVK = ::CreateGraphicsPipelineVK; - wrapperVKInterface.CreateComputePipelineVK = ::CreateComputePipelineVK; - wrapperVKInterface.CreateQueryPoolVK = ::CreateQueryPoolVK; - wrapperVKInterface.CreateAccelerationStructureVK = ::CreateAccelerationStructureVK; - - wrapperVKInterface.GetVkPhysicalDevice = ::GetVkPhysicalDevice; - wrapperVKInterface.GetVkInstance = ::GetVkInstance; - wrapperVKInterface.GetVkGetDeviceProcAddr = ::GetVkGetDeviceProcAddr; - wrapperVKInterface.GetVkGetInstanceProcAddr = ::GetVkGetInstanceProcAddr; - - return ValidateFunctionTable(wrapperVKInterface); -#else - return Result::UNSUPPORTED; -#endif + MeshShader_CommandBuffer_PartiallyFillFunctionTableVal(table); + + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ SwapChain ] - -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceVal&)device).CreateSwapChain(swapChainDesc, swapChain); -} +#pragma region[ RayTracing ] -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; +static void NRI_CALL GetAccelerationStructureMemoryDesc( + const Device& device, const AccelerationStructureDesc& accelerationStructureDesc, MemoryLocation memoryLocation, MemoryDesc& memoryDesc) { + DeviceVal& deviceVal = (DeviceVal&)device; - GetDeviceVal(swapChain).DestroySwapChain(swapChain); -} + AccelerationStructureDesc accelerationStructureDescImpl = accelerationStructureDesc; -Result DeviceVal::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - swapChainInterface = {}; - if (!m_IsSwapChainSupported) - return Result::UNSUPPORTED; + uint32_t geometryObjectNum = accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL ? accelerationStructureDesc.instanceOrGeometryObjectNum : 0; + Scratch objectImplArray = AllocateScratch(deviceVal, GeometryObject, geometryObjectNum); - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; - - SwapChain_PartiallyFillFunctionTableVal(swapChainInterface); + if (accelerationStructureDesc.type == AccelerationStructureType::BOTTOM_LEVEL) { + ConvertGeometryObjectsVal(objectImplArray, accelerationStructureDesc.geometryObjects, geometryObjectNum); + accelerationStructureDescImpl.geometryObjects = objectImplArray; + } - return ValidateFunctionTable(swapChainInterface); + deviceVal.GetRayTracingInterface().GetAccelerationStructureMemoryDesc(deviceVal.GetImpl(), accelerationStructureDescImpl, memoryLocation, memoryDesc); + deviceVal.RegisterMemoryType(memoryDesc.type, memoryLocation); } -#pragma endregion - -#pragma region[ RayTracing ] - static Result NRI_CALL CreateRayTracingPipeline(Device& device, const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { - return ((DeviceVal&)device).CreateRayTracingPipeline(pipelineDesc, pipeline); + return ((DeviceVal&)device).CreatePipeline(pipelineDesc, pipeline); } static Result NRI_CALL CreateAccelerationStructure(Device& device, const AccelerationStructureDesc& accelerationStructureDesc, AccelerationStructure*& accelerationStructure) { @@ -522,52 +323,42 @@ static void NRI_CALL DestroyAccelerationStructure(AccelerationStructure& acceler GetDeviceVal(accelerationStructure).DestroyAccelerationStructure(accelerationStructure); } -void FillFunctionTablePipelineVal(RayTracingInterface& rayTracingInterface); +void FillFunctionTablePipelineVal(RayTracingInterface& table); -Result DeviceVal::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { - rayTracingInterface = {}; +Result DeviceVal::FillFunctionTable(RayTracingInterface& table) const { + table = {}; if (!m_IsRayTracingSupported) return Result::UNSUPPORTED; - rayTracingInterface.CreateRayTracingPipeline = ::CreateRayTracingPipeline; - rayTracingInterface.CreateAccelerationStructure = ::CreateAccelerationStructure; - rayTracingInterface.BindAccelerationStructureMemory = ::BindAccelerationStructureMemory; - rayTracingInterface.DestroyAccelerationStructure = ::DestroyAccelerationStructure; - - RayTracing_CommandBuffer_PartiallyFillFunctionTableVal(rayTracingInterface); - RayTracing_AccelerationStructure_PartiallyFillFunctionTableVal(rayTracingInterface); - FillFunctionTablePipelineVal(rayTracingInterface); + FillFunctionTablePipelineVal(table); + RayTracing_CommandBuffer_PartiallyFillFunctionTableVal(table); + RayTracing_AccelerationStructure_PartiallyFillFunctionTableVal(table); + RayTracing_Device_PartiallyFillFunctionTableVal(table); - return ValidateFunctionTable(rayTracingInterface); + return ValidateFunctionTable(table); } #pragma endregion -#pragma region[ MeshShader ] - -Result DeviceVal::FillFunctionTable(MeshShaderInterface& meshShaderInterface) const { - meshShaderInterface = {}; - if (!m_IsMeshShaderSupported) - return Result::UNSUPPORTED; - - MeshShader_CommandBuffer_PartiallyFillFunctionTableVal(meshShaderInterface); +#pragma region[ ResourceAllocator ] - return ValidateFunctionTable(meshShaderInterface); +static Result AllocateBuffer(Device& device, const AllocateBufferDesc& bufferDesc, Buffer*& buffer) { + return ((DeviceVal&)device).CreateBuffer(bufferDesc, buffer); } -#pragma endregion - -#pragma region[ LowLatency ] +static Result AllocateTexture(Device& device, const AllocateTextureDesc& textureDesc, Texture*& texture) { + return ((DeviceVal&)device).CreateTexture(textureDesc, texture); +} -Result DeviceVal::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { - lowLatencyInterface = {}; - if (!m_IsLowLatencySupported) - return Result::UNSUPPORTED; +static Result AllocateAccelerationStructure(Device& device, const AllocateAccelerationStructureDesc& acelerationStructureDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceVal&)device).CreateAccelerationStructure(acelerationStructureDesc, accelerationStructure); +} - LowLatency_CommandQueue_PartiallyFillFunctionTableVal(lowLatencyInterface); - LowLatency_SwapChain_PartiallyFillFunctionTableVal(lowLatencyInterface); +Result DeviceVal::FillFunctionTable(ResourceAllocatorInterface& table) const { + table = {}; + ResourceAllocator_Device_PartiallyFillFunctionTableVal(table); - return ValidateFunctionTable(lowLatencyInterface); + return ValidateFunctionTable(table); } #pragma endregion @@ -592,10 +383,8 @@ static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, S Streamer* impl = nullptr; Result result = deviceVal.GetStreamerInterface().CreateStreamer(deviceVal.GetImpl(), streamerDesc, impl); - if (result == Result::SUCCESS) { - RETURN_ON_FAILURE(&deviceVal, impl != nullptr, Result::FAILURE, "CreateStreamer: 'impl' is NULL"); + if (result == Result::SUCCESS) streamer = (Streamer*)Allocate(deviceVal.GetStdAllocator(), deviceVal, impl); - } return result; } @@ -606,9 +395,9 @@ static void DestroyStreamer(Streamer& streamer) { streamerVal.GetStreamerInterface().DestroyStreamer(*NRI_GET_IMPL(Streamer, &streamer)); - Deallocate(deviceVal.GetStdAllocator(), streamerVal.constantBuffer); - Deallocate(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); - Deallocate(deviceVal.GetStdAllocator(), &streamerVal); + Destroy(deviceVal.GetStdAllocator(), streamerVal.constantBuffer); + Destroy(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); + Destroy(deviceVal.GetStdAllocator(), &streamerVal); } static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { @@ -617,7 +406,7 @@ static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { Buffer* buffer = streamerVal.GetStreamerInterface().GetStreamerConstantBuffer(*NRI_GET_IMPL(Streamer, &streamer)); if (!streamerVal.constantBuffer) - streamerVal.constantBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer); + streamerVal.constantBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer, false); return (Buffer*)streamerVal.constantBuffer; } @@ -681,12 +470,12 @@ static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { Buffer* buffer = streamerVal.GetStreamerInterface().GetStreamerDynamicBuffer(*NRI_GET_IMPL(Streamer, &streamer)); if (NRI_GET_IMPL(Buffer, streamerVal.dynamicBuffer) != buffer) { - Deallocate(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); + Destroy(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); streamerVal.dynamicBuffer = nullptr; } if (!streamerVal.dynamicBuffer) - streamerVal.dynamicBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer); + streamerVal.dynamicBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer, false); return (Buffer*)streamerVal.dynamicBuffer; } @@ -697,19 +486,212 @@ static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Stream streamerVal.GetStreamerInterface().CmdUploadStreamerUpdateRequests(*NRI_GET_IMPL(CommandBuffer, &commandBuffer), *NRI_GET_IMPL(Streamer, &streamer)); } -Result DeviceVal::FillFunctionTable(StreamerInterface& streamerInterface) const { - streamerInterface = {}; - streamerInterface.CreateStreamer = ::CreateStreamer; - streamerInterface.DestroyStreamer = ::DestroyStreamer; - streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; - streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; - streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; - streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; - streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; - streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; - streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; +Result DeviceVal::FillFunctionTable(StreamerInterface& table) const { + table = {}; + Streamer_Device_PartiallyFillFunctionTableVal(table); + + return ValidateFunctionTable(table); +} + +#pragma endregion + +#pragma region[ SwapChain ] + +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceVal&)device).CreateSwapChain(swapChainDesc, swapChain); +} + +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + if (!(&swapChain)) + return; + + GetDeviceVal(swapChain).DestroySwapChain(swapChain); +} + +Result DeviceVal::FillFunctionTable(SwapChainInterface& table) const { + table = {}; + if (!m_IsSwapChainSupported) + return Result::UNSUPPORTED; + + SwapChain_Device_PartiallyFillFunctionTableVal(table); + SwapChain_SwapChain_PartiallyFillFunctionTableVal(table); - return ValidateFunctionTable(streamerInterface); + return ValidateFunctionTable(table); } #pragma endregion + +#pragma region[ WrapperD3D11 ] + +#if NRI_USE_D3D11 + +static Result NRI_CALL CreateCommandBufferD3D11(Device& device, const CommandBufferD3D11Desc& commandBufferD3D11Desc, CommandBuffer*& commandBuffer) { + return ((DeviceVal&)device).CreateCommandBuffer(commandBufferD3D11Desc, commandBuffer); +} + +static Result NRI_CALL CreateBufferD3D11(Device& device, const BufferD3D11Desc& bufferD3D11Desc, Buffer*& buffer) { + return ((DeviceVal&)device).CreateBuffer(bufferD3D11Desc, buffer); +} + +static Result NRI_CALL CreateTextureD3D11(Device& device, const TextureD3D11Desc& textureD3D11Desc, Texture*& texture) { + return ((DeviceVal&)device).CreateTexture(textureD3D11Desc, texture); +} + +#endif + +Result DeviceVal::FillFunctionTable(WrapperD3D11Interface& table) const { + table = {}; +#if NRI_USE_D3D11 + if (!m_IsWrapperD3D11Supported) + return Result::UNSUPPORTED; + + WrapperD3D11_Device_PartiallyFillFunctionTableVal(table); + + return ValidateFunctionTable(table); +#else + return Result::UNSUPPORTED; +#endif +} + +#pragma endregion + +#pragma region[ WrapperD3D12 ] + +#if NRI_USE_D3D12 + +static Result NRI_CALL CreateCommandBufferD3D12(Device& device, const CommandBufferD3D12Desc& commandBufferD3D12Desc, CommandBuffer*& commandBuffer) { + return ((DeviceVal&)device).CreateCommandBuffer(commandBufferD3D12Desc, commandBuffer); +} + +static Result NRI_CALL CreateDescriptorPoolD3D12(Device& device, const DescriptorPoolD3D12Desc& descriptorPoolD3D12Desc, DescriptorPool*& descriptorPool) { + return ((DeviceVal&)device).CreateDescriptorPool(descriptorPoolD3D12Desc, descriptorPool); +} + +static Result NRI_CALL CreateBufferD3D12(Device& device, const BufferD3D12Desc& bufferD3D12Desc, Buffer*& buffer) { + return ((DeviceVal&)device).CreateBuffer(bufferD3D12Desc, buffer); +} + +static Result NRI_CALL CreateTextureD3D12(Device& device, const TextureD3D12Desc& textureD3D12Desc, Texture*& texture) { + return ((DeviceVal&)device).CreateTexture(textureD3D12Desc, texture); +} + +static Result NRI_CALL CreateMemoryD3D12(Device& device, const MemoryD3D12Desc& memoryD3D12Desc, Memory*& memory) { + return ((DeviceVal&)device).CreateMemory(memoryD3D12Desc, memory); +} + +static Result NRI_CALL CreateAccelerationStructureD3D12( + Device& device, const AccelerationStructureD3D12Desc& accelerationStructureD3D12Desc, AccelerationStructure*& accelerationStructure) { + return ((DeviceVal&)device).CreateAccelerationStructure(accelerationStructureD3D12Desc, accelerationStructure); +} + +#endif + +Result DeviceVal::FillFunctionTable(WrapperD3D12Interface& table) const { + table = {}; +#if NRI_USE_D3D12 + if (!m_IsWrapperD3D12Supported) + return Result::UNSUPPORTED; + + WrapperD3D12_Device_PartiallyFillFunctionTableVal(table); + + return ValidateFunctionTable(table); + +#else + return Result::UNSUPPORTED; +#endif +} + +#pragma endregion + +#pragma region[ WrapperVK ] + +#if NRI_USE_VULKAN + +static Result NRI_CALL CreateCommandQueueVK(Device& device, const CommandQueueVKDesc& commandQueueVKDesc, CommandQueue*& commandQueue) { + return ((DeviceVal&)device).CreateCommandQueue(commandQueueVKDesc, commandQueue); +} + +static Result NRI_CALL CreateCommandAllocatorVK(Device& device, const CommandAllocatorVKDesc& commandAllocatorVKDesc, CommandAllocator*& commandAllocator) { + return ((DeviceVal&)device).CreateCommandAllocator(commandAllocatorVKDesc, commandAllocator); +} + +static Result NRI_CALL CreateCommandBufferVK(Device& device, const CommandBufferVKDesc& commandBufferVKDesc, CommandBuffer*& commandBuffer) { + return ((DeviceVal&)device).CreateCommandBuffer(commandBufferVKDesc, commandBuffer); +} + +static Result NRI_CALL CreateDescriptorPoolVK(Device& device, const DescriptorPoolVKDesc& descriptorPoolVKDesc, DescriptorPool*& descriptorPool) { + return ((DeviceVal&)device).CreateDescriptorPool(descriptorPoolVKDesc, descriptorPool); +} + +static Result NRI_CALL CreateBufferVK(Device& device, const BufferVKDesc& bufferVKDesc, Buffer*& buffer) { + return ((DeviceVal&)device).CreateBuffer(bufferVKDesc, buffer); +} + +static Result NRI_CALL CreateTextureVK(Device& device, const TextureVKDesc& textureVKDesc, Texture*& texture) { + return ((DeviceVal&)device).CreateTexture(textureVKDesc, texture); +} + +static Result NRI_CALL CreateMemoryVK(Device& device, const MemoryVKDesc& memoryVKDesc, Memory*& memory) { + return ((DeviceVal&)device).CreateMemory(memoryVKDesc, memory); +} + +static Result NRI_CALL CreateGraphicsPipelineVK(Device& device, VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + return ((DeviceVal&)device).CreateGraphicsPipeline(vkPipeline, pipeline); +} + +static Result NRI_CALL CreateComputePipelineVK(Device& device, VKNonDispatchableHandle vkPipeline, Pipeline*& pipeline) { + return ((DeviceVal&)device).CreateComputePipeline(vkPipeline, pipeline); +} + +static Result NRI_CALL CreateQueryPoolVK(Device& device, const QueryPoolVKDesc& queryPoolVKDesc, QueryPool*& queryPool) { + return ((DeviceVal&)device).CreateQueryPool(queryPoolVKDesc, queryPool); +} + +static Result NRI_CALL CreateAccelerationStructureVK( + Device& device, const AccelerationStructureVKDesc& accelerationStructureVKDesc, AccelerationStructure*& accelerationStructure) { + return ((DeviceVal&)device).CreateAccelerationStructure(accelerationStructureVKDesc, accelerationStructure); +} + +static VKHandle NRI_CALL GetPhysicalDeviceVK(const Device& device) { + return ((DeviceVal&)device).GetWrapperVKInterface().GetPhysicalDeviceVK(((DeviceVal&)device).GetImpl()); +} + +static VKHandle NRI_CALL GetInstanceVK(const Device& device) { + return ((DeviceVal&)device).GetWrapperVKInterface().GetInstanceVK(((DeviceVal&)device).GetImpl()); +} + +static void* NRI_CALL GetInstanceProcAddrVK(const Device& device) { + return ((DeviceVal&)device).GetWrapperVKInterface().GetInstanceProcAddrVK(((DeviceVal&)device).GetImpl()); +} + +static void* NRI_CALL GetDeviceProcAddrVK(const Device& device) { + return ((DeviceVal&)device).GetWrapperVKInterface().GetDeviceProcAddrVK(((DeviceVal&)device).GetImpl()); +} + +#endif + +Result DeviceVal::FillFunctionTable(WrapperVKInterface& table) const { + table = {}; +#if NRI_USE_VULKAN + if (!m_IsWrapperVKSupported) + return Result::UNSUPPORTED; + + WrapperVK_Device_PartiallyFillFunctionTableVal(table); + + return ValidateFunctionTable(table); +#else + return Result::UNSUPPORTED; +#endif +} + +#pragma endregion + +Define_Core_Device_PartiallyFillFunctionTable(Val); +Define_Helper_Device_PartiallyFillFunctionTable(Val); +Define_RayTracing_Device_PartiallyFillFunctionTable(Val); +Define_Streamer_Device_PartiallyFillFunctionTable(Val); +Define_SwapChain_Device_PartiallyFillFunctionTable(Val); +Define_ResourceAllocator_Device_PartiallyFillFunctionTable(Val); +Define_WrapperD3D11_Device_PartiallyFillFunctionTable(Val); +Define_WrapperD3D12_Device_PartiallyFillFunctionTable(Val); +Define_WrapperVK_Device_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/FenceVal.h b/Source/Validation/FenceVal.h index 9e655e45..baf2ead9 100644 --- a/Source/Validation/FenceVal.h +++ b/Source/Validation/FenceVal.h @@ -7,7 +7,7 @@ namespace nri { struct CommandQueueVal; struct FenceVal : public DeviceObjectVal { - inline FenceVal(DeviceVal& device, Fence* queueSemaphore) : DeviceObjectVal(device, queueSemaphore) { + inline FenceVal(DeviceVal& device, Fence* fence) : DeviceObjectVal(device, fence) { } inline ~FenceVal() { diff --git a/Source/Validation/MemoryVal.cpp b/Source/Validation/MemoryVal.cpp index 1066fd42..e755443e 100644 --- a/Source/Validation/MemoryVal.cpp +++ b/Source/Validation/MemoryVal.cpp @@ -49,13 +49,13 @@ void MemoryVal::ReportBoundResources() { void MemoryVal::BindBuffer(BufferVal& buffer) { ExclusiveScope lockScope(m_Lock); m_Buffers.push_back(&buffer); - buffer.SetBoundToMemory(*this); + buffer.SetBoundToMemory(this); } void MemoryVal::BindTexture(TextureVal& texture) { ExclusiveScope lockScope(m_Lock); m_Textures.push_back(&texture); - texture.SetBoundToMemory(*this); + texture.SetBoundToMemory(this); } void MemoryVal::BindAccelerationStructure(AccelerationStructureVal& accelerationStructure) { diff --git a/Source/Validation/MemoryVal.h b/Source/Validation/MemoryVal.h index 2db04d0f..0e250b38 100644 --- a/Source/Validation/MemoryVal.h +++ b/Source/Validation/MemoryVal.h @@ -40,7 +40,7 @@ struct MemoryVal : public DeviceObjectVal { std::vector m_Textures; std::vector m_AccelerationStructures; uint64_t m_Size = 0; - MemoryLocation m_MemoryLocation = MemoryLocation::MAX_NUM; + MemoryLocation m_MemoryLocation = MemoryLocation::MAX_NUM; // wrapped object Lock m_Lock; }; diff --git a/Source/Validation/PipelineVal.hpp b/Source/Validation/PipelineVal.hpp index a693c5d9..fc73ffa2 100644 --- a/Source/Validation/PipelineVal.hpp +++ b/Source/Validation/PipelineVal.hpp @@ -6,8 +6,8 @@ static Result NRI_CALL WriteShaderGroupIdentifiers(const Pipeline& pipeline, uin return ((PipelineVal&)pipeline).WriteShaderGroupIdentifiers(baseShaderGroupIndex, shaderGroupNum, buffer); } -void FillFunctionTablePipelineVal(RayTracingInterface& rayTracingInterface) { - rayTracingInterface.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; +void FillFunctionTablePipelineVal(RayTracingInterface& table) { + table.WriteShaderGroupIdentifiers = ::WriteShaderGroupIdentifiers; } #pragma endregion diff --git a/Source/Validation/SwapChainVal.cpp b/Source/Validation/SwapChainVal.cpp index 254420dd..42812b6e 100644 --- a/Source/Validation/SwapChainVal.cpp +++ b/Source/Validation/SwapChainVal.cpp @@ -10,7 +10,7 @@ using namespace nri; SwapChainVal::~SwapChainVal() { for (size_t i = 0; i < m_Textures.size(); i++) - Deallocate(m_Device.GetStdAllocator(), m_Textures[i]); + Destroy(m_Device.GetStdAllocator(), m_Textures[i]); } inline void SwapChainVal::SetDebugName(const char* name) { @@ -23,7 +23,7 @@ inline Texture* const* SwapChainVal::GetTextures(uint32_t& textureNum) { if (m_Textures.empty()) { for (uint32_t i = 0; i < textureNum; i++) { - TextureVal* textureVal = Allocate(m_Device.GetStdAllocator(), m_Device, textures[i]); + TextureVal* textureVal = Allocate(m_Device.GetStdAllocator(), m_Device, textures[i], true); m_Textures.push_back(textureVal); } } diff --git a/Source/Validation/SwapChainVal.hpp b/Source/Validation/SwapChainVal.hpp index 3345e37c..f7d80770 100644 --- a/Source/Validation/SwapChainVal.hpp +++ b/Source/Validation/SwapChainVal.hpp @@ -48,5 +48,5 @@ static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latenc #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(Val); -Define_LowLatency_SwapChain_PartiallyFillFunctionTable(Val); +Define_SwapChain_SwapChain_PartiallyFillFunctionTable(Val); +Define_LowLatency_SwapChain_SwapChain_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/TextureVal.h b/Source/Validation/TextureVal.h index 154d7fc1..3a7ab3d4 100644 --- a/Source/Validation/TextureVal.h +++ b/Source/Validation/TextureVal.h @@ -7,7 +7,7 @@ namespace nri { struct MemoryVal; struct TextureVal : public DeviceObjectVal { - TextureVal(DeviceVal& device, Texture* texture) : DeviceObjectVal(device, texture) { + TextureVal(DeviceVal& device, Texture* texture, bool isBoundToMemory) : DeviceObjectVal(device, texture), m_IsBoundToMemory(isBoundToMemory) { } ~TextureVal(); @@ -24,12 +24,8 @@ struct TextureVal : public DeviceObjectVal { return m_IsBoundToMemory; } - inline void SetBoundToMemory() { - m_IsBoundToMemory = true; - } - - inline void SetBoundToMemory(MemoryVal& memory) { - m_Memory = &memory; + inline void SetBoundToMemory(MemoryVal* memory = nullptr) { + m_Memory = memory; m_IsBoundToMemory = true; }