From f43844d72526428101a4da7601aec999753c8ca0 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 16 Oct 2025 14:01:10 -0700 Subject: [PATCH 01/20] first attempt --- include/Support/Pipeline.h | 1 + lib/API/DX/Device.cpp | 267 ++++++++++++++++++++++++++++++- lib/Support/Pipeline.cpp | 1 + test/Feature/HLSLLib/Mapped.test | 84 ++++++++++ 4 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 test/Feature/HLSLLib/Mapped.test diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 15b4fc8ea..2f07be12a 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -143,6 +143,7 @@ struct Resource { std::optional VKBinding; Buffer *BufferPtr = nullptr; bool HasCounter; + int TilesMapped = -1; bool isRaw() const { switch (Kind) { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index b3dee60c3..5c7912214 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -593,7 +593,125 @@ class DXDevice : public offloadtest::Device { return HeapIdx; } - llvm::Expected createUAV(Resource &R, InvocationState &IS) { + llvm::Expected createUnmappedUAV(Resource &R, + InvocationState &IS) { + ResourceBundle Bundle; + const uint32_t BufferSize = getUAVBufferSize(R); + + const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); + + const D3D12_HEAP_PROPERTIES ReadBackHeapProp = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK); + const D3D12_RESOURCE_DESC ReadBackResDesc = { + D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + BufferSize, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; + + const D3D12_RESOURCE_DESC UploadResDesc = + CD3DX12_RESOURCE_DESC::Buffer(BufferSize); + + uint32_t RegOffset = 0; + for (const auto &ResData : R.BufferPtr->Data) { + llvm::outs() << "Creating UAV: { Size = " << BufferSize + << ", Register = u" << R.DXBinding.Register + RegOffset + << ", Space = " << R.DXBinding.Space + << ", HasCounter = " << R.HasCounter + << ", TilesMapped = " << R.TilesMapped << " }\n"; + + ComPtr Buffer; + if (auto Err = + HR::toError(Device->CreateReservedResource( + &ResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create reserved resource (buffer).")) + return Err; + + ComPtr UploadBuffer; + if (auto Err = HR::toError( + Device->CreateReservedResource( + &UploadResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&UploadBuffer)), + "Failed to create reserved resource (upload buffer).")) + return Err; + + ComPtr ReadBackBuffer; + if (auto Err = HR::toError( + Device->CreateCommittedResource( + &ReadBackHeapProp, D3D12_HEAP_FLAG_NONE, &ReadBackResDesc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&ReadBackBuffer)), + "Failed to create committed resource (readback buffer).")) + return Err; + + // --- Tile mapping setup --- + UINT numTiles = R.TilesMapped; + std::vector startCoords(numTiles); + std::vector regionSizes(numTiles); + std::vector rangeFlags( + numTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector heapRangeStartOffsets(numTiles); + std::vector rangeTileCounts(numTiles, 1); + + // Create a heap large enough for all mapped tiles + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.SizeInBytes = + numTiles * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + ComPtr heap; + if (auto Err = + HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + "Failed to create heap for tiled resource.")) + return Err; + + // Fill tile coordinates and region sizes + for (UINT i = 0; i < numTiles; ++i) { + startCoords[i] = {i, 0, 0, 0}; + regionSizes[i].NumTiles = 1; + regionSizes[i].UseBox = FALSE; + heapRangeStartOffsets[i] = i; + } + + // Retrieve a command queue from InvocationState + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + + // Map the first numTiles tiles in the Buffer + CommandQueue->UpdateTileMappings( + Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), + heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), + rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); + + // Map upload buffer to copy data + void *ResDataPtr = nullptr; + D3D12_RANGE range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + memcpy(ResDataPtr, ResData.get(), R.size()); + UploadBuffer->Unmap(0, nullptr); + } else { + return llvm::createStringError(std::errc::io_error, + "Failed to map upload buffer."); + } + + addResourceUploadCommands(R, IS, Buffer, UploadBuffer); + + Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer); + RegOffset++; + } + + return Bundle; + } + + llvm::Expected createFullyMappedUAV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = getUAVBufferSize(R); @@ -628,6 +746,7 @@ class DXDevice : public offloadtest::Device { << ", HasCounter = " << R.HasCounter << " }\n"; ComPtr Buffer; + if (auto Err = HR::toError( Device->CreateCommittedResource( &HeapProp, D3D12_HEAP_FLAG_NONE, &ResDesc, @@ -669,6 +788,13 @@ class DXDevice : public offloadtest::Device { return Bundle; } + llvm::Expected createUAV(Resource &R, InvocationState &IS) { + if (R.TilesMapped != -1) + return createUnmappedUAV(R, IS); + else + return createFullyMappedUAV(R, IS); + } + // returns the next available HeapIdx uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { @@ -698,7 +824,135 @@ class DXDevice : public offloadtest::Device { return (Sz + 255u) & 0xFFFFFFFFFFFFFF00; } - llvm::Expected createCBV(Resource &R, InvocationState &IS) { + llvm::Expected createUnmappedCBV(Resource &R, + InvocationState &IS) { + ResourceBundle Bundle; + + const size_t CBVSize = getCBVSize(R.size()); + + // Create a buffer description for a reserved buffer (no physical memory + // yet) + const D3D12_RESOURCE_DESC ResDesc = { + D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + CBVSize, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; + + const D3D12_HEAP_PROPERTIES UploadHeapProp = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + const D3D12_RESOURCE_DESC UploadResDesc = + CD3DX12_RESOURCE_DESC::Buffer(CBVSize); + + uint32_t RegOffset = 0; + for (const auto &ResData : R.BufferPtr->Data) { + llvm::outs() << "Creating CBV: { Size = " << CBVSize << ", Register = b" + << R.DXBinding.Register + RegOffset + << ", Space = " << R.DXBinding.Space + << ", TilesMapped = " << R.TilesMapped << " }\n"; + + // Create the GPU-side reserved buffer (no physical memory yet) + ComPtr Buffer; + if (auto Err = + HR::toError(Device->CreateReservedResource( + &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create reserved resource (buffer).")) + return Err; + + // Create an upload buffer (committed, CPU-visible) for copying the CBV + // contents + ComPtr UploadBuffer; + if (auto Err = HR::toError( + Device->CreateCommittedResource( + &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&UploadBuffer)), + "Failed to create committed resource (upload buffer).")) + return Err; + + // --- Tile mapping setup (map first R.TilesMapped tiles) --- + UINT numTiles = static_cast(R.TilesMapped); + ComPtr heap; // only created if numTiles > 0 + if (numTiles > 0) { + std::vector startCoords(numTiles); + std::vector regionSizes(numTiles); + std::vector rangeFlags( + numTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector heapRangeStartOffsets(numTiles); + std::vector rangeTileCounts(numTiles, 1); + + // Create a heap large enough for all mapped tiles + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.SizeInBytes = static_cast(numTiles) * + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + if (auto Err = + HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + "Failed to create heap for tiled resource.")) + return Err; + + // Fill tile coordinates and region sizes (map tiles starting from tile + // 0) + for (UINT i = 0; i < numTiles; ++i) { + startCoords[i] = {i, 0, 0, 0}; + regionSizes[i].NumTiles = 1; + regionSizes[i].UseBox = FALSE; + heapRangeStartOffsets[i] = i; + } + + // Retrieve a command queue from InvocationState + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + + // --- THIS LINE MAPS THE FIRST numTiles TILES (from tile 0 to + // R.TilesMapped - 1) --- + CommandQueue->UpdateTileMappings( + Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), + heap.Get(), numTiles, rangeFlags.data(), + heapRangeStartOffsets.data(), rangeTileCounts.data(), + D3D12_TILE_MAPPING_FLAG_NONE); + } // end if numTiles > 0 + + // Initialize the CBV data into the upload buffer + void *ResDataPtr = nullptr; + D3D12_RANGE mapRange = { + 0, 0}; // We don't intend to read from the upload buffer on CPU + if (FAILED(UploadBuffer->Map(0, &mapRange, &ResDataPtr))) + return llvm::createStringError(std::errc::io_error, + "Failed to map upload buffer."); + + // Copy CBV payload + memcpy(ResDataPtr, ResData.get(), R.size()); + + // Zero any remaining bytes (correct length CBVSize - R.size()) + if (R.size() < CBVSize) { + void *ExtraData = static_cast(ResDataPtr) + R.size(); + memset(ExtraData, 0, CBVSize - R.size()); + } + + UploadBuffer->Unmap(0, nullptr); + + // Issue GPU-side copy/upload commands to transfer UploadBuffer -> Buffer + addResourceUploadCommands(R, IS, Buffer, UploadBuffer); + + // Save bundle (no readback buffer for CBV) + Bundle.emplace_back(UploadBuffer, Buffer, nullptr); + RegOffset++; + } + + return Bundle; + } + + llvm::Expected createFullyMappedCBV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const size_t CBVSize = getCBVSize(R.size()); @@ -753,7 +1007,7 @@ class DXDevice : public offloadtest::Device { // Zero any remaining bytes if (R.size() < CBVSize) { void *ExtraData = static_cast(ResDataPtr) + R.size(); - memset(ExtraData, 0, CBVSize - R.size() - 1); + memset(ExtraData, 0, CBVSize - R.size()); } UploadBuffer->Unmap(0, nullptr); @@ -765,6 +1019,13 @@ class DXDevice : public offloadtest::Device { return Bundle; } + llvm::Expected createCBV(Resource &R, InvocationState &IS) { + if (R.TilesMapped != -1) + return createUnmappedCBV(R, IS); + else + return createFullyMappedCBV(R, IS); + } + // returns the next available HeapIdx uint32_t bindCBV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index d47db5eea..fb491d89f 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -272,6 +272,7 @@ void MappingTraits::mapping(IO &I, I.mapRequired("Name", R.Name); I.mapRequired("Kind", R.Kind); I.mapOptional("HasCounter", R.HasCounter, 0); + I.mapOptional("TilesMapped", R.TilesMapped, -1); I.mapRequired("DirectXBinding", R.DXBinding); I.mapOptional("VulkanBinding", R.VKBinding); } diff --git a/test/Feature/HLSLLib/Mapped.test b/test/Feature/HLSLLib/Mapped.test new file mode 100644 index 000000000..c0ce6b877 --- /dev/null +++ b/test/Feature/HLSLLib/Mapped.test @@ -0,0 +1,84 @@ +#--- source.hlsl + +struct S{ + int data[512]; +}; + +StructuredBuffer X : register(t0); +StructuredBuffer Y : register(t1); + +RWStructuredBuffer Out : register(u2); + + +[numthreads(1,1,1)] +void main() { + // 256 elements in the Structured Buffer will span 2 tiles + Out[0] = X[0].data[0]; + Out[1] = X[200].data[0]; + Out[2] = Y[0].data[0]; + Out[3] = Y[200].data[0]; +} +//--- pipeline.yaml + +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Stride: 65536 + FillSize: 131072 + FillValue: 1 + - Name: Y + Format: Int32 + Stride: 65536 + FillSize: 131072 + FillValue: 2 + TilesMapped: 1 + - Name: Out + Format: Int32 + Stride: 4 + ZeroInitSize: 64 + - Name: ExpectedOut + Format: Int32 + Stride: 4 + Data: [1, 1, 2, 0] + +Results: + - Result: Test + Rule: BufferFloatEpsilon + Epsilon: 0.0008 + Actual: Out + Expected: ExpectedOut +DescriptorSets: + - Resources: + - Name: X + Kind: StructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + - Name: Y + Kind: StructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 +#--- end + +# REQUIRES: DirectX + +# RUN: split-file %s %t +# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o From a3b4ce16543d945d7b47aab2a6cdbb2141b30de2 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 15:20:19 -0700 Subject: [PATCH 02/20] first attempt --- include/Support/Pipeline.h | 1 + lib/API/DX/Device.cpp | 234 ++++++++++++++++++++++++------- test/Feature/HLSLLib/Mapped.test | 47 +++++-- 3 files changed, 216 insertions(+), 66 deletions(-) diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 2f07be12a..d4dfddd49 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -103,6 +103,7 @@ struct Buffer { DataFormat Format; int Channels; int Stride; + uint32_t ArraySize; // Data can contain one block of data for a singular resource // or multiple blocks for a resource array. diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 5c7912214..9ef30c922 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -244,9 +244,12 @@ class DXDevice : public offloadtest::Device { ComPtr Upload; ComPtr Buffer; ComPtr Readback; + // In unmapped cases, the Heap lifetime needs to be preserved + ComPtr Heap; ResourceSet(ComPtr Upload, ComPtr Buffer, - ComPtr Readback) - : Upload(Upload), Buffer(Buffer), Readback(Readback) {} + ComPtr Readback, + ComPtr Heap = nullptr) + : Upload(Upload), Buffer(Buffer), Readback(Readback), Heap(Heap) {} }; // ResourceBundle will contain one ResourceSet for a singular resource @@ -521,7 +524,107 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } - llvm::Expected createSRV(Resource &R, InvocationState &IS) { + llvm::Expected createUnmappedSRV(Resource &R, + InvocationState &IS) { + ResourceBundle Bundle; + const uint32_t BufferSize = R.size(); + const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); + + const D3D12_RESOURCE_DESC UploadResDesc = + CD3DX12_RESOURCE_DESC::Buffer(BufferSize); + + uint32_t RegOffset = 0; + for (const auto &ResData : R.BufferPtr->Data) { + llvm::outs() << "Creating SRV: { Size = " << BufferSize + << ", Register = t" << R.DXBinding.Register + RegOffset + << ", Space = " << R.DXBinding.Space + << ", TilesMapped = " << R.TilesMapped << " }\n"; + + // --- Reserved SRV resource --- + ComPtr Buffer; + if (auto Err = + HR::toError(Device->CreateReservedResource( + &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS(&Buffer)), + "Failed to create reserved resource (buffer).")) + return Err; + + // --- Committed Upload Buffer --- + ComPtr UploadBuffer; + const D3D12_HEAP_PROPERTIES UploadHeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + + if (auto Err = HR::toError( + Device->CreateCommittedResource( + &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, // upload state for CPU + // writes + nullptr, IID_PPV_ARGS(&UploadBuffer)), + "Failed to create committed resource (upload buffer).")) + return Err; + + // --- Tile mapping setup --- + UINT numTiles = R.TilesMapped; + std::vector startCoords(numTiles); + std::vector regionSizes(numTiles); + std::vector rangeFlags( + numTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector heapRangeStartOffsets(numTiles); + std::vector rangeTileCounts(numTiles, 1); + + // Create a heap large enough for all mapped tiles + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.SizeInBytes = + numTiles * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + ComPtr heap; + if (auto Err = + HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + "Failed to create heap for tiled SRV resource.")) + return Err; + + // Fill tile coordinates and region sizes + for (UINT i = 0; i < numTiles; ++i) { + startCoords[i] = {i, 0, 0, 0}; + regionSizes[i].NumTiles = 1; + regionSizes[i].UseBox = FALSE; + heapRangeStartOffsets[i] = i; + } + + // Retrieve a command queue from InvocationState + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + + // Map the first numTiles tiles in the Buffer + CommandQueue->UpdateTileMappings( + Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), + heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), + rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); + + // --- Upload data initialization --- + void *ResDataPtr = nullptr; + D3D12_RANGE range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + memcpy(ResDataPtr, ResData.get(), R.size()); + UploadBuffer->Unmap(0, nullptr); + } else { + return llvm::createStringError(std::errc::io_error, + "Failed to map SRV upload buffer."); + } + + addResourceUploadCommands(R, IS, Buffer, UploadBuffer); + + Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); + RegOffset++; + } + + return Bundle; + } + + llvm::Expected createMappedSRV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const D3D12_HEAP_PROPERTIES HeapProp = @@ -571,6 +674,12 @@ class DXDevice : public offloadtest::Device { return Bundle; } + llvm::Expected createSRV(Resource &R, InvocationState &IS) { + if (R.TilesMapped != -1) + return createUnmappedSRV(R, IS); + return createMappedSRV(R, IS); + } + // returns the next available HeapIdx uint32_t bindSRV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { @@ -597,7 +706,6 @@ class DXDevice : public offloadtest::Device { InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = getUAVBufferSize(R); - const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); const D3D12_HEAP_PROPERTIES ReadBackHeapProp = @@ -625,6 +733,7 @@ class DXDevice : public offloadtest::Device { << ", HasCounter = " << R.HasCounter << ", TilesMapped = " << R.TilesMapped << " }\n"; + // --- Reserved destination buffer (UAV target) --- ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -633,14 +742,20 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; + // --- Committed upload buffer (CPU visible) --- ComPtr UploadBuffer; + const D3D12_HEAP_PROPERTIES UploadHeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + if (auto Err = HR::toError( - Device->CreateReservedResource( - &UploadResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + Device->CreateCommittedResource( + &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&UploadBuffer)), - "Failed to create reserved resource (upload buffer).")) + "Failed to create committed resource (upload buffer).")) return Err; + // --- Readback buffer (committed) --- ComPtr ReadBackBuffer; if (auto Err = HR::toError( Device->CreateCommittedResource( @@ -650,60 +765,72 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (readback buffer).")) return Err; - // --- Tile mapping setup --- - UINT numTiles = R.TilesMapped; - std::vector startCoords(numTiles); - std::vector regionSizes(numTiles); - std::vector rangeFlags( - numTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector heapRangeStartOffsets(numTiles); - std::vector rangeTileCounts(numTiles, 1); + // --- Tile mapping setup (map first R.TilesMapped tiles) --- + UINT numTiles = static_cast(R.TilesMapped); + ComPtr heap; // optional backing heap for mapped tiles - // Create a heap large enough for all mapped tiles - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.SizeInBytes = - numTiles * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + if (numTiles > 0) { + std::vector startCoords(numTiles); + std::vector regionSizes(numTiles); + std::vector rangeFlags( + numTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector heapRangeStartOffsets(numTiles); + std::vector rangeTileCounts(numTiles, 1); - ComPtr heap; - if (auto Err = - HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), - "Failed to create heap for tiled resource.")) - return Err; + // Create a heap large enough for the requested mapped tiles + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.SizeInBytes = static_cast(numTiles) * + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - // Fill tile coordinates and region sizes - for (UINT i = 0; i < numTiles; ++i) { - startCoords[i] = {i, 0, 0, 0}; - regionSizes[i].NumTiles = 1; - regionSizes[i].UseBox = FALSE; - heapRangeStartOffsets[i] = i; - } + if (auto Err = + HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + "Failed to create heap for tiled resource.")) + return Err; - // Retrieve a command queue from InvocationState - ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + // Fill tile coordinates and region sizes (map tiles starting at tile 0) + for (UINT i = 0; i < numTiles; ++i) { + startCoords[i] = {i, 0, 0, 0}; + regionSizes[i].NumTiles = 1; + regionSizes[i].UseBox = FALSE; + heapRangeStartOffsets[i] = i; + } - // Map the first numTiles tiles in the Buffer - CommandQueue->UpdateTileMappings( - Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), - heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), - rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); + // Retrieve a command queue from InvocationState + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + + // Map the first numTiles tiles in the Buffer + CommandQueue->UpdateTileMappings( + Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), + heap.Get(), numTiles, rangeFlags.data(), + heapRangeStartOffsets.data(), rangeTileCounts.data(), + D3D12_TILE_MAPPING_FLAG_NONE); + } - // Map upload buffer to copy data + // --- Upload data initialization --- void *ResDataPtr = nullptr; - D3D12_RANGE range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + D3D12_RANGE mapRange = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &mapRange, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); + // Zero any remaining bytes if buffer element is larger than R.size() + if (R.size() < BufferSize) { + memset(static_cast(ResDataPtr) + R.size(), 0, + BufferSize - R.size()); + } UploadBuffer->Unmap(0, nullptr); } else { return llvm::createStringError(std::errc::io_error, "Failed to map upload buffer."); } + // Issue copy/upload commands to transfer UploadBuffer -> Buffer (tile + // region) addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer); + // Store heap in Bundle so it lives until caller releases the Bundle + Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer, heap); RegOffset++; } @@ -791,8 +918,7 @@ class DXDevice : public offloadtest::Device { llvm::Expected createUAV(Resource &R, InvocationState &IS) { if (R.TilesMapped != -1) return createUnmappedUAV(R, IS); - else - return createFullyMappedUAV(R, IS); + return createFullyMappedUAV(R, IS); } // returns the next available HeapIdx @@ -809,7 +935,8 @@ class DXDevice : public offloadtest::Device { for (const ResourceSet &RS : ResBundle) { llvm::outs() << "UAV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize << " NumElts = " << NumElts - << " HasCounter = " << R.HasCounter << "\n"; + << " HasCounter = " << R.HasCounter + << " TilesMapped = " << R.TilesMapped << "\n"; D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = UAVHandleHeapStart; UAVHandle.ptr += HeapIdx * DescHandleIncSize; ID3D12Resource *CounterBuffer = R.HasCounter ? RS.Buffer.Get() : nullptr; @@ -878,7 +1005,8 @@ class DXDevice : public offloadtest::Device { // --- Tile mapping setup (map first R.TilesMapped tiles) --- UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // only created if numTiles > 0 + ComPtr heap; // keep alive in bundle if created + if (numTiles > 0) { std::vector startCoords(numTiles); std::vector regionSizes(numTiles); @@ -943,8 +1071,9 @@ class DXDevice : public offloadtest::Device { // Issue GPU-side copy/upload commands to transfer UploadBuffer -> Buffer addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - // Save bundle (no readback buffer for CBV) - Bundle.emplace_back(UploadBuffer, Buffer, nullptr); + // Save bundle (store heap so backing memory stays alive while GPU uses + // it) + Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); RegOffset++; } @@ -1022,8 +1151,7 @@ class DXDevice : public offloadtest::Device { llvm::Expected createCBV(Resource &R, InvocationState &IS) { if (R.TilesMapped != -1) return createUnmappedCBV(R, IS); - else - return createFullyMappedCBV(R, IS); + return createFullyMappedCBV(R, IS); } // returns the next available HeapIdx diff --git a/test/Feature/HLSLLib/Mapped.test b/test/Feature/HLSLLib/Mapped.test index c0ce6b877..cbd58c2cc 100644 --- a/test/Feature/HLSLLib/Mapped.test +++ b/test/Feature/HLSLLib/Mapped.test @@ -12,11 +12,30 @@ RWStructuredBuffer Out : register(u2); [numthreads(1,1,1)] void main() { - // 256 elements in the Structured Buffer will span 2 tiles - Out[0] = X[0].data[0]; - Out[1] = X[200].data[0]; - Out[2] = Y[0].data[0]; - Out[3] = Y[200].data[0]; + // 32 S structs inside X or Y occupy 64KB of data. (32 * 512 ints * 4 bytes per int) + // So, any index into the buffer >= [32] will access a new "tile" + + int idx = 0; + + uint status; + S Out0 = X.Load(0, status); + Out[idx] = Out0.data[0]; + Out[idx + 4] = status; + idx += 1; + + S Out1 = X.Load(50, status); + Out[idx] = Out1.data[0]; + Out[idx + 4] = status; + idx += 1; + + S Out2 = Y.Load(0, status); + Out[idx] = Out2.data[0]; + Out[idx + 4] = status; + idx += 1; + + S Out3 = Y.Load(50, status); + Out[idx] = Out3.data[0]; + Out[idx + 4] = status; } //--- pipeline.yaml @@ -28,28 +47,28 @@ Shaders: Buffers: - Name: X Format: Int32 - Stride: 65536 + Stride: 2048 # S is 512 ints, 512*4 = 2048. FillSize: 131072 FillValue: 1 - Name: Y Format: Int32 - Stride: 65536 + Stride: 2048 FillSize: 131072 FillValue: 2 - TilesMapped: 1 - Name: Out Format: Int32 Stride: 4 - ZeroInitSize: 64 + FillSize: 32 - Name: ExpectedOut Format: Int32 Stride: 4 - Data: [1, 1, 2, 0] + # first 4 values are the actual data retrieved. For non-resident loads, 0 is expected. + # last 4 values are the status. 1 is expected for resident memory, 0 for non-resident + Data: [1, 1, 2, 0, 1, 1, 1, 0] Results: - Result: Test - Rule: BufferFloatEpsilon - Epsilon: 0.0008 + Rule: BufferExact Actual: Out Expected: ExpectedOut DescriptorSets: @@ -68,6 +87,7 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 1 + TilesMapped: 1 - Name: Out Kind: RWStructuredBuffer DirectXBinding: @@ -77,7 +97,8 @@ DescriptorSets: Binding: 2 #--- end -# REQUIRES: DirectX +# XFAIL: Clang +# XFAIL: Vulkan # RUN: split-file %s %t # RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl From 2d371edffc4499e4761d4de940f279f085b8eb11 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 15:29:21 -0700 Subject: [PATCH 03/20] self review --- include/Support/Pipeline.h | 1 - lib/API/DX/Device.cpp | 24 +++++++++++------------- test/Feature/HLSLLib/Mapped.test | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index d4dfddd49..2f07be12a 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -103,7 +103,6 @@ struct Buffer { DataFormat Format; int Channels; int Stride; - uint32_t ArraySize; // Data can contain one block of data for a singular resource // or multiple blocks for a resource array. diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 9ef30c922..c8d7f1ee6 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -540,7 +540,7 @@ class DXDevice : public offloadtest::Device { << ", Space = " << R.DXBinding.Space << ", TilesMapped = " << R.TilesMapped << " }\n"; - // --- Reserved SRV resource --- + // Reserved SRV resource ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -549,7 +549,7 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // --- Committed Upload Buffer --- + // Committed Upload Buffer ComPtr UploadBuffer; const D3D12_HEAP_PROPERTIES UploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); @@ -563,7 +563,7 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // --- Tile mapping setup --- + // Tile mapping setup UINT numTiles = R.TilesMapped; std::vector startCoords(numTiles); std::vector regionSizes(numTiles); @@ -603,7 +603,7 @@ class DXDevice : public offloadtest::Device { heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); - // --- Upload data initialization --- + // Upload data initialization void *ResDataPtr = nullptr; D3D12_RANGE range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { @@ -733,7 +733,7 @@ class DXDevice : public offloadtest::Device { << ", HasCounter = " << R.HasCounter << ", TilesMapped = " << R.TilesMapped << " }\n"; - // --- Reserved destination buffer (UAV target) --- + // Reserved destination buffer (UAV target) ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -742,7 +742,7 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // --- Committed upload buffer (CPU visible) --- + // Committed upload buffer (CPU visible) ComPtr UploadBuffer; const D3D12_HEAP_PROPERTIES UploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); @@ -755,7 +755,7 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // --- Readback buffer (committed) --- + // Readback buffer (committed) ComPtr ReadBackBuffer; if (auto Err = HR::toError( Device->CreateCommittedResource( @@ -765,7 +765,7 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (readback buffer).")) return Err; - // --- Tile mapping setup (map first R.TilesMapped tiles) --- + // Tile mapping setup (map first R.TilesMapped tiles) UINT numTiles = static_cast(R.TilesMapped); ComPtr heap; // optional backing heap for mapped tiles @@ -809,7 +809,7 @@ class DXDevice : public offloadtest::Device { D3D12_TILE_MAPPING_FLAG_NONE); } - // --- Upload data initialization --- + // Upload data initialization void *ResDataPtr = nullptr; D3D12_RANGE mapRange = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &mapRange, &ResDataPtr))) { @@ -873,7 +873,6 @@ class DXDevice : public offloadtest::Device { << ", HasCounter = " << R.HasCounter << " }\n"; ComPtr Buffer; - if (auto Err = HR::toError( Device->CreateCommittedResource( &HeapProp, D3D12_HEAP_FLAG_NONE, &ResDesc, @@ -1003,7 +1002,7 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // --- Tile mapping setup (map first R.TilesMapped tiles) --- + // Tile mapping setup (map first R.TilesMapped tiles) UINT numTiles = static_cast(R.TilesMapped); ComPtr heap; // keep alive in bundle if created @@ -1040,8 +1039,7 @@ class DXDevice : public offloadtest::Device { // Retrieve a command queue from InvocationState ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // --- THIS LINE MAPS THE FIRST numTiles TILES (from tile 0 to - // R.TilesMapped - 1) --- + // Map the first numtiles Tiles CommandQueue->UpdateTileMappings( Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), heap.Get(), numTiles, rangeFlags.data(), diff --git a/test/Feature/HLSLLib/Mapped.test b/test/Feature/HLSLLib/Mapped.test index cbd58c2cc..a1ca134f6 100644 --- a/test/Feature/HLSLLib/Mapped.test +++ b/test/Feature/HLSLLib/Mapped.test @@ -101,5 +101,5 @@ DescriptorSets: # XFAIL: Vulkan # RUN: split-file %s %t -# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o From 284d8fe8195d355de240326ea54c929ec93146d7 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 15:58:36 -0700 Subject: [PATCH 04/20] add check access fully mapped --- lib/API/DX/Device.cpp | 90 ++++++++++++++++++-------------- test/Feature/HLSLLib/Mapped.test | 35 +++++++++++-- 2 files changed, 81 insertions(+), 44 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index c8d7f1ee6..259018d92 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -549,7 +549,7 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // Committed Upload Buffer + // Committed Upload Buffer (CPU visible) ComPtr UploadBuffer; const D3D12_HEAP_PROPERTIES UploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); @@ -557,65 +557,75 @@ class DXDevice : public offloadtest::Device { if (auto Err = HR::toError( Device->CreateCommittedResource( &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, // upload state for CPU - // writes - nullptr, IID_PPV_ARGS(&UploadBuffer)), + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&UploadBuffer)), "Failed to create committed resource (upload buffer).")) return Err; - // Tile mapping setup - UINT numTiles = R.TilesMapped; - std::vector startCoords(numTiles); - std::vector regionSizes(numTiles); - std::vector rangeFlags( - numTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector heapRangeStartOffsets(numTiles); - std::vector rangeTileCounts(numTiles, 1); - - // Create a heap large enough for all mapped tiles - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.SizeInBytes = - numTiles * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - - ComPtr heap; - if (auto Err = - HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), - "Failed to create heap for tiled SRV resource.")) - return Err; + // Tile mapping setup (optional if numTiles > 0) + UINT numTiles = static_cast(R.TilesMapped); + ComPtr heap; // optional, only created if numTiles > 0 - // Fill tile coordinates and region sizes - for (UINT i = 0; i < numTiles; ++i) { - startCoords[i] = {i, 0, 0, 0}; - regionSizes[i].NumTiles = 1; - regionSizes[i].UseBox = FALSE; - heapRangeStartOffsets[i] = i; - } + if (numTiles > 0) { + std::vector startCoords(numTiles); + std::vector regionSizes(numTiles); + std::vector rangeFlags( + numTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector heapRangeStartOffsets(numTiles); + std::vector rangeTileCounts(numTiles, 1); + + // Create a heap large enough for the mapped tiles + D3D12_HEAP_DESC heapDesc = {}; + heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.SizeInBytes = static_cast(numTiles) * + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + if (auto Err = + HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + "Failed to create heap for tiled SRV resource.")) + return Err; - // Retrieve a command queue from InvocationState - ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + // Fill tile coordinates and region sizes + for (UINT i = 0; i < numTiles; ++i) { + startCoords[i] = {i, 0, 0, 0}; + regionSizes[i].NumTiles = 1; + regionSizes[i].UseBox = FALSE; + heapRangeStartOffsets[i] = i; + } + + // Retrieve a command queue from InvocationState + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // Map the first numTiles tiles in the Buffer - CommandQueue->UpdateTileMappings( - Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), - heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), - rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); + // Map the first numTiles tiles in the Buffer + CommandQueue->UpdateTileMappings( + Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), + heap.Get(), numTiles, rangeFlags.data(), + heapRangeStartOffsets.data(), rangeTileCounts.data(), + D3D12_TILE_MAPPING_FLAG_NONE); + } // Upload data initialization void *ResDataPtr = nullptr; D3D12_RANGE range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); + // Zero remaining bytes if the buffer is padded + if (R.size() < BufferSize) { + memset(static_cast(ResDataPtr) + R.size(), 0, + BufferSize - R.size()); + } UploadBuffer->Unmap(0, nullptr); } else { return llvm::createStringError(std::errc::io_error, "Failed to map SRV upload buffer."); } + // Add GPU upload commands addResourceUploadCommands(R, IS, Buffer, UploadBuffer); + // Store resource bundle (heap optional) Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); RegOffset++; } diff --git a/test/Feature/HLSLLib/Mapped.test b/test/Feature/HLSLLib/Mapped.test index a1ca134f6..26b8ff333 100644 --- a/test/Feature/HLSLLib/Mapped.test +++ b/test/Feature/HLSLLib/Mapped.test @@ -8,6 +8,7 @@ StructuredBuffer X : register(t0); StructuredBuffer Y : register(t1); RWStructuredBuffer Out : register(u2); +RWStructuredBuffer CAFM : register(u3); [numthreads(1,1,1)] @@ -21,21 +22,25 @@ void main() { S Out0 = X.Load(0, status); Out[idx] = Out0.data[0]; Out[idx + 4] = status; + CAFM[idx] = CheckAccessFullyMapped(status) ? 1 : 0; idx += 1; S Out1 = X.Load(50, status); Out[idx] = Out1.data[0]; Out[idx + 4] = status; + CAFM[idx] = CheckAccessFullyMapped(status) ? 1 : 0; idx += 1; S Out2 = Y.Load(0, status); Out[idx] = Out2.data[0]; Out[idx + 4] = status; + CAFM[idx] = CheckAccessFullyMapped(status) ? 1 : 0; idx += 1; S Out3 = Y.Load(50, status); Out[idx] = Out3.data[0]; - Out[idx + 4] = status; + Out[idx + 4] = status; + CAFM[idx] = CheckAccessFullyMapped(status) ? 1 : 0; } //--- pipeline.yaml @@ -64,13 +69,27 @@ Buffers: Stride: 4 # first 4 values are the actual data retrieved. For non-resident loads, 0 is expected. # last 4 values are the status. 1 is expected for resident memory, 0 for non-resident - Data: [1, 1, 2, 0, 1, 1, 1, 0] + Data: [1, 0, 0, 0, 1, 0, 0, 0] + - Name: CAFM + Format: Int32 + Stride: 4 + FillSize: 16 + FillValue: 0 + - Name: ExpectedCAFM + Format: Int32 + Stride: 4 + # Only the first data access should be accessing fully mapped memory + Data: [1, 0, 0, 0] Results: - Result: Test - Rule: BufferExact + Rule: BufferExact Actual: Out Expected: ExpectedOut + - Result: TestCAFM + Rule: BufferExact + Actual: CAFM + Expected: ExpectedCAFM DescriptorSets: - Resources: - Name: X @@ -80,6 +99,7 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 0 + TilesMapped: 1 - Name: Y Kind: StructuredBuffer DirectXBinding: @@ -87,7 +107,7 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 1 - TilesMapped: 1 + TilesMapped: 0 - Name: Out Kind: RWStructuredBuffer DirectXBinding: @@ -95,6 +115,13 @@ DescriptorSets: Space: 0 VulkanBinding: Binding: 2 + - Name: CAFM + Kind: RWStructuredBuffer + DirectXBinding: + Register: 3 + Space: 0 + VulkanBinding: + Binding: 3 #--- end # XFAIL: Clang From 9c6664b14900affadc870960998e0a9db18f41de Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 16:32:20 -0700 Subject: [PATCH 05/20] make all 3 new functions more consistent between each other --- lib/API/DX/Device.cpp | 100 +++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 55 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 259018d92..27353dba8 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -608,8 +608,8 @@ class DXDevice : public offloadtest::Device { // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + D3D12_RANGE Range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); // Zero remaining bytes if the buffer is padded if (R.size() < BufferSize) { @@ -625,7 +625,7 @@ class DXDevice : public offloadtest::Device { // Add GPU upload commands addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - // Store resource bundle (heap optional) + // Store heap in Bundle so it lives until caller releases the Bundle Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); RegOffset++; } @@ -743,7 +743,7 @@ class DXDevice : public offloadtest::Device { << ", HasCounter = " << R.HasCounter << ", TilesMapped = " << R.TilesMapped << " }\n"; - // Reserved destination buffer (UAV target) + // Reserved UAV resource ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -752,7 +752,7 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // Committed upload buffer (CPU visible) + // Committed Upload Buffer (CPU visible) ComPtr UploadBuffer; const D3D12_HEAP_PROPERTIES UploadHeapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); @@ -775,9 +775,9 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (readback buffer).")) return Err; - // Tile mapping setup (map first R.TilesMapped tiles) + // Tile mapping setup (optional if numTiles > 0) UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // optional backing heap for mapped tiles + ComPtr heap; // optional, only created if numTiles > 0 if (numTiles > 0) { std::vector startCoords(numTiles); @@ -787,7 +787,7 @@ class DXDevice : public offloadtest::Device { std::vector heapRangeStartOffsets(numTiles); std::vector rangeTileCounts(numTiles, 1); - // Create a heap large enough for the requested mapped tiles + // Create a heap large enough for the mapped tiles D3D12_HEAP_DESC heapDesc = {}; heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; @@ -797,10 +797,10 @@ class DXDevice : public offloadtest::Device { if (auto Err = HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), - "Failed to create heap for tiled resource.")) + "Failed to create heap for tiled UAV resource.")) return Err; - // Fill tile coordinates and region sizes (map tiles starting at tile 0) + // Fill tile coordinates and region sizes for (UINT i = 0; i < numTiles; ++i) { startCoords[i] = {i, 0, 0, 0}; regionSizes[i].NumTiles = 1; @@ -821,10 +821,10 @@ class DXDevice : public offloadtest::Device { // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE mapRange = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &mapRange, &ResDataPtr))) { + D3D12_RANGE Range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); - // Zero any remaining bytes if buffer element is larger than R.size() + // Zero remaining bytes if the buffer is padded if (R.size() < BufferSize) { memset(static_cast(ResDataPtr) + R.size(), 0, BufferSize - R.size()); @@ -835,8 +835,7 @@ class DXDevice : public offloadtest::Device { "Failed to map upload buffer."); } - // Issue copy/upload commands to transfer UploadBuffer -> Buffer (tile - // region) + // Add GPU upload commands addResourceUploadCommands(R, IS, Buffer, UploadBuffer); // Store heap in Bundle so it lives until caller releases the Bundle @@ -964,14 +963,11 @@ class DXDevice : public offloadtest::Device { InvocationState &IS) { ResourceBundle Bundle; - const size_t CBVSize = getCBVSize(R.size()); - - // Create a buffer description for a reserved buffer (no physical memory - // yet) + const size_t BufferSize = getCBVSize(R.size()); const D3D12_RESOURCE_DESC ResDesc = { D3D12_RESOURCE_DIMENSION_BUFFER, 0, - CBVSize, + BufferSize, 1, 1, 1, @@ -980,19 +976,17 @@ class DXDevice : public offloadtest::Device { D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(CBVSize); + CD3DX12_RESOURCE_DESC::Buffer(BufferSize); uint32_t RegOffset = 0; for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating CBV: { Size = " << CBVSize << ", Register = b" - << R.DXBinding.Register + RegOffset + llvm::outs() << "Creating CBV: { Size = " << BufferSize + << ", Register = b" << R.DXBinding.Register + RegOffset << ", Space = " << R.DXBinding.Space << ", TilesMapped = " << R.TilesMapped << " }\n"; - // Create the GPU-side reserved buffer (no physical memory yet) + // Reserved CBV resource ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -1001,20 +995,21 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // Create an upload buffer (committed, CPU-visible) for copying the CBV - // contents + // Committed Upload Buffer (CPU visible) ComPtr UploadBuffer; + const D3D12_HEAP_PROPERTIES UploadHeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); if (auto Err = HR::toError( Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, + &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&UploadBuffer)), "Failed to create committed resource (upload buffer).")) return Err; - // Tile mapping setup (map first R.TilesMapped tiles) + // Tile mapping setup (optional if numTiles > 0) UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // keep alive in bundle if created + ComPtr heap; // optional, only created if numTiles > 0 if (numTiles > 0) { std::vector startCoords(numTiles); @@ -1024,7 +1019,7 @@ class DXDevice : public offloadtest::Device { std::vector heapRangeStartOffsets(numTiles); std::vector rangeTileCounts(numTiles, 1); - // Create a heap large enough for all mapped tiles + // Create a heap large enough for the mapped tiles D3D12_HEAP_DESC heapDesc = {}; heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; @@ -1034,11 +1029,10 @@ class DXDevice : public offloadtest::Device { if (auto Err = HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), - "Failed to create heap for tiled resource.")) + "Failed to create heap for tiled CBV resource.")) return Err; - // Fill tile coordinates and region sizes (map tiles starting from tile - // 0) + // Fill tile coordinates and region sizes for (UINT i = 0; i < numTiles; ++i) { startCoords[i] = {i, 0, 0, 0}; regionSizes[i].NumTiles = 1; @@ -1049,38 +1043,34 @@ class DXDevice : public offloadtest::Device { // Retrieve a command queue from InvocationState ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // Map the first numtiles Tiles + // Map the first numTiles tiles in the Buffer CommandQueue->UpdateTileMappings( Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), heap.Get(), numTiles, rangeFlags.data(), heapRangeStartOffsets.data(), rangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); - } // end if numTiles > 0 + } - // Initialize the CBV data into the upload buffer + // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE mapRange = { - 0, 0}; // We don't intend to read from the upload buffer on CPU - if (FAILED(UploadBuffer->Map(0, &mapRange, &ResDataPtr))) + D3D12_RANGE range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + memcpy(ResDataPtr, ResData.get(), R.size()); + // Zero remaining bytes if the buffer is padded + if (R.size() < BufferSize) { + memset(static_cast(ResDataPtr) + R.size(), 0, + BufferSize - R.size()); + } + UploadBuffer->Unmap(0, nullptr); + } else { return llvm::createStringError(std::errc::io_error, - "Failed to map upload buffer."); - - // Copy CBV payload - memcpy(ResDataPtr, ResData.get(), R.size()); - - // Zero any remaining bytes (correct length CBVSize - R.size()) - if (R.size() < CBVSize) { - void *ExtraData = static_cast(ResDataPtr) + R.size(); - memset(ExtraData, 0, CBVSize - R.size()); + "Failed to map CBV upload buffer."); } - UploadBuffer->Unmap(0, nullptr); - - // Issue GPU-side copy/upload commands to transfer UploadBuffer -> Buffer + // Add GPU upload commands addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - // Save bundle (store heap so backing memory stays alive while GPU uses - // it) + // Store resource bundle (heap optional) Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); RegOffset++; } From eda271510e9d0064330f3cc7a57611a8d060d70b Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 16:33:56 -0700 Subject: [PATCH 06/20] rename test file name --- .../HLSLLib/{Mapped.test => PartiallyMappedResources.test} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/Feature/HLSLLib/{Mapped.test => PartiallyMappedResources.test} (100%) diff --git a/test/Feature/HLSLLib/Mapped.test b/test/Feature/HLSLLib/PartiallyMappedResources.test similarity index 100% rename from test/Feature/HLSLLib/Mapped.test rename to test/Feature/HLSLLib/PartiallyMappedResources.test From b58d9248148143758d60dda5355173151adc2722 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 17 Oct 2025 17:50:27 -0700 Subject: [PATCH 07/20] address variable casing errors --- lib/API/DX/Device.cpp | 166 +++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 27353dba8..5f3571ad3 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -562,47 +562,47 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // Tile mapping setup (optional if numTiles > 0) - UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // optional, only created if numTiles > 0 - - if (numTiles > 0) { - std::vector startCoords(numTiles); - std::vector regionSizes(numTiles); - std::vector rangeFlags( - numTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector heapRangeStartOffsets(numTiles); - std::vector rangeTileCounts(numTiles, 1); - - // Create a heap large enough for the mapped tiles - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.SizeInBytes = static_cast(numTiles) * + // Tile mapping setup (optional if NumTiles > 0) + UINT NumTiles = static_cast(R.TilesMapped); + ComPtr Heap; // optional, only created if NumTiles > 0 + + if (NumTiles > 0) { + std::vector StartCoords(NumTiles); + std::vector RegionSizes(NumTiles); + std::vector RangeFlags( + NumTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector HeapRangeStartOffsets(NumTiles); + std::vector RangeTileCounts(NumTiles, 1); + + // Create a Heap large enough for the mapped tiles + D3D12_HEAP_DESC HeapDesc = {}; + HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + HeapDesc.SizeInBytes = static_cast(NumTiles) * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; if (auto Err = - HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), "Failed to create heap for tiled SRV resource.")) return Err; // Fill tile coordinates and region sizes - for (UINT i = 0; i < numTiles; ++i) { - startCoords[i] = {i, 0, 0, 0}; - regionSizes[i].NumTiles = 1; - regionSizes[i].UseBox = FALSE; - heapRangeStartOffsets[i] = i; + for (UINT I = 0; I < NumTiles; ++I) { + StartCoords[I] = {I, 0, 0, 0}; + RegionSizes[I].NumTiles = 1; + RegionSizes[I].UseBox = FALSE; + HeapRangeStartOffsets[I] = I; } // Retrieve a command queue from InvocationState ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // Map the first numTiles tiles in the Buffer + // Map the first NumTiles tiles in the Buffer CommandQueue->UpdateTileMappings( - Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), - heap.Get(), numTiles, rangeFlags.data(), - heapRangeStartOffsets.data(), rangeTileCounts.data(), + Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), + Heap.Get(), NumTiles, RangeFlags.data(), + HeapRangeStartOffsets.data(), RangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); } @@ -626,7 +626,7 @@ class DXDevice : public offloadtest::Device { addResourceUploadCommands(R, IS, Buffer, UploadBuffer); // Store heap in Bundle so it lives until caller releases the Bundle - Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); + Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); RegOffset++; } @@ -775,47 +775,47 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (readback buffer).")) return Err; - // Tile mapping setup (optional if numTiles > 0) - UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // optional, only created if numTiles > 0 + // Tile mapping setup (optional if NumTiles > 0) + UINT NumTiles = static_cast(R.TilesMapped); + ComPtr Heap; // optional, only created if NumTiles > 0 - if (numTiles > 0) { - std::vector startCoords(numTiles); - std::vector regionSizes(numTiles); - std::vector rangeFlags( - numTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector heapRangeStartOffsets(numTiles); - std::vector rangeTileCounts(numTiles, 1); + if (NumTiles > 0) { + std::vector StartCoords(NumTiles); + std::vector RegionSizes(NumTiles); + std::vector RangeFlags( + NumTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector HeapRangeStartOffsets(NumTiles); + std::vector RangeTileCounts(NumTiles, 1); // Create a heap large enough for the mapped tiles - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.SizeInBytes = static_cast(numTiles) * + D3D12_HEAP_DESC HeapDesc = {}; + HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + HeapDesc.SizeInBytes = static_cast(NumTiles) * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; if (auto Err = - HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), "Failed to create heap for tiled UAV resource.")) return Err; // Fill tile coordinates and region sizes - for (UINT i = 0; i < numTiles; ++i) { - startCoords[i] = {i, 0, 0, 0}; - regionSizes[i].NumTiles = 1; - regionSizes[i].UseBox = FALSE; - heapRangeStartOffsets[i] = i; + for (UINT I = 0; I < NumTiles; ++I) { + StartCoords[I] = {I, 0, 0, 0}; + RegionSizes[I].NumTiles = 1; + RegionSizes[I].UseBox = FALSE; + HeapRangeStartOffsets[I] = I; } // Retrieve a command queue from InvocationState ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // Map the first numTiles tiles in the Buffer + // Map the first NumTiles tiles in the Buffer CommandQueue->UpdateTileMappings( - Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), - heap.Get(), numTiles, rangeFlags.data(), - heapRangeStartOffsets.data(), rangeTileCounts.data(), + Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), + Heap.Get(), NumTiles, RangeFlags.data(), + HeapRangeStartOffsets.data(), RangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); } @@ -839,7 +839,7 @@ class DXDevice : public offloadtest::Device { addResourceUploadCommands(R, IS, Buffer, UploadBuffer); // Store heap in Bundle so it lives until caller releases the Bundle - Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer, heap); + Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer, Heap); RegOffset++; } @@ -1007,54 +1007,54 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // Tile mapping setup (optional if numTiles > 0) - UINT numTiles = static_cast(R.TilesMapped); - ComPtr heap; // optional, only created if numTiles > 0 + // Tile mapping setup (optional if NumTiles > 0) + UINT NumTiles = static_cast(R.TilesMapped); + ComPtr Heap; // optional, only created if NumTiles > 0 - if (numTiles > 0) { - std::vector startCoords(numTiles); - std::vector regionSizes(numTiles); - std::vector rangeFlags( - numTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector heapRangeStartOffsets(numTiles); - std::vector rangeTileCounts(numTiles, 1); + if (NumTiles > 0) { + std::vector StartCoords(NumTiles); + std::vector RegionSizes(NumTiles); + std::vector RangeFlags( + NumTiles, D3D12_TILE_RANGE_FLAG_NONE); + std::vector HeapRangeStartOffsets(NumTiles); + std::vector RangeTileCounts(NumTiles, 1); // Create a heap large enough for the mapped tiles - D3D12_HEAP_DESC heapDesc = {}; - heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - heapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.SizeInBytes = static_cast(numTiles) * + D3D12_HEAP_DESC HeapDesc = {}; + HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + HeapDesc.SizeInBytes = static_cast(NumTiles) * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - heapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; if (auto Err = - HR::toError(Device->CreateHeap(&heapDesc, IID_PPV_ARGS(&heap)), + HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), "Failed to create heap for tiled CBV resource.")) return Err; // Fill tile coordinates and region sizes - for (UINT i = 0; i < numTiles; ++i) { - startCoords[i] = {i, 0, 0, 0}; - regionSizes[i].NumTiles = 1; - regionSizes[i].UseBox = FALSE; - heapRangeStartOffsets[i] = i; + for (UINT I = 0; I < NumTiles; ++I) { + StartCoords[I] = {I, 0, 0, 0}; + RegionSizes[I].NumTiles = 1; + RegionSizes[I].UseBox = FALSE; + HeapRangeStartOffsets[I] = I; } // Retrieve a command queue from InvocationState ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - // Map the first numTiles tiles in the Buffer + // Map the first NumTiles tiles in the Buffer CommandQueue->UpdateTileMappings( - Buffer.Get(), numTiles, startCoords.data(), regionSizes.data(), - heap.Get(), numTiles, rangeFlags.data(), - heapRangeStartOffsets.data(), rangeTileCounts.data(), + Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), + Heap.Get(), NumTiles, RangeFlags.data(), + HeapRangeStartOffsets.data(), RangeTileCounts.data(), D3D12_TILE_MAPPING_FLAG_NONE); } // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &range, &ResDataPtr))) { + D3D12_RANGE Range = {0, 0}; // no reads expected + if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); // Zero remaining bytes if the buffer is padded if (R.size() < BufferSize) { @@ -1071,7 +1071,7 @@ class DXDevice : public offloadtest::Device { addResourceUploadCommands(R, IS, Buffer, UploadBuffer); // Store resource bundle (heap optional) - Bundle.emplace_back(UploadBuffer, Buffer, nullptr, heap); + Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); RegOffset++; } From 59a0b7c5b42c84ba80b1d26cf7229fcb6e05bd22 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 20 Oct 2025 18:51:17 -0700 Subject: [PATCH 08/20] address Justin and Damyan --- include/Support/Pipeline.h | 2 +- lib/API/DX/Device.cpp | 68 +++++++++++-------- lib/API/VK/Device.cpp | 8 +-- .../HLSLLib/PartiallyMappedResources.test | 17 +++-- 4 files changed, 54 insertions(+), 41 deletions(-) diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 2f07be12a..3f03a6046 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -143,7 +143,7 @@ struct Resource { std::optional VKBinding; Buffer *BufferPtr = nullptr; bool HasCounter; - int TilesMapped = -1; + std::optional TilesMapped; bool isRaw() const { switch (Kind) { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 5f3571ad3..08f1311a4 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -244,7 +244,6 @@ class DXDevice : public offloadtest::Device { ComPtr Upload; ComPtr Buffer; ComPtr Readback; - // In unmapped cases, the Heap lifetime needs to be preserved ComPtr Heap; ResourceSet(ComPtr Upload, ComPtr Buffer, ComPtr Readback, @@ -524,7 +523,7 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } - llvm::Expected createUnmappedSRV(Resource &R, + llvm::Expected createReservedSRV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = R.size(); @@ -537,8 +536,10 @@ class DXDevice : public offloadtest::Device { for (const auto &ResData : R.BufferPtr->Data) { llvm::outs() << "Creating SRV: { Size = " << BufferSize << ", Register = t" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space - << ", TilesMapped = " << R.TilesMapped << " }\n"; + << ", Space = " << R.DXBinding.Space; + if (R.TilesMapped) + llvm::outs() << ", TilesMapped = " << *R.TilesMapped; + llvm::outs() << " }\n"; // Reserved SRV resource ComPtr Buffer; @@ -563,7 +564,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(R.TilesMapped); + UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -633,8 +634,8 @@ class DXDevice : public offloadtest::Device { return Bundle; } - llvm::Expected createMappedSRV(Resource &R, - InvocationState &IS) { + llvm::Expected createCommittedSRV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const D3D12_HEAP_PROPERTIES HeapProp = @@ -685,9 +686,9 @@ class DXDevice : public offloadtest::Device { } llvm::Expected createSRV(Resource &R, InvocationState &IS) { - if (R.TilesMapped != -1) - return createUnmappedSRV(R, IS); - return createMappedSRV(R, IS); + if (R.TilesMapped) + return createReservedSRV(R, IS); + return createCommittedSRV(R, IS); } // returns the next available HeapIdx @@ -712,7 +713,7 @@ class DXDevice : public offloadtest::Device { return HeapIdx; } - llvm::Expected createUnmappedUAV(Resource &R, + llvm::Expected createReservedUAV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = getUAVBufferSize(R); @@ -740,8 +741,10 @@ class DXDevice : public offloadtest::Device { llvm::outs() << "Creating UAV: { Size = " << BufferSize << ", Register = u" << R.DXBinding.Register + RegOffset << ", Space = " << R.DXBinding.Space - << ", HasCounter = " << R.HasCounter - << ", TilesMapped = " << R.TilesMapped << " }\n"; + << ", HasCounter = " << R.HasCounter; + if (R.TilesMapped) + llvm::outs() << ", TilesMapped = " << *R.TilesMapped; + llvm::outs() << " }\n"; // Reserved UAV resource ComPtr Buffer; @@ -776,7 +779,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(R.TilesMapped); + UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -846,8 +849,8 @@ class DXDevice : public offloadtest::Device { return Bundle; } - llvm::Expected createFullyMappedUAV(Resource &R, - InvocationState &IS) { + llvm::Expected createCommittedUAV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = getUAVBufferSize(R); @@ -924,9 +927,9 @@ class DXDevice : public offloadtest::Device { } llvm::Expected createUAV(Resource &R, InvocationState &IS) { - if (R.TilesMapped != -1) - return createUnmappedUAV(R, IS); - return createFullyMappedUAV(R, IS); + if (R.TilesMapped) + return createReservedUAV(R, IS); + return createCommittedUAV(R, IS); } // returns the next available HeapIdx @@ -943,8 +946,11 @@ class DXDevice : public offloadtest::Device { for (const ResourceSet &RS : ResBundle) { llvm::outs() << "UAV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize << " NumElts = " << NumElts - << " HasCounter = " << R.HasCounter - << " TilesMapped = " << R.TilesMapped << "\n"; + << " HasCounter = " << R.HasCounter; + if (R.TilesMapped) + llvm::outs() << ", TilesMapped = " << *R.TilesMapped; + llvm::outs() << " }\n"; + D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = UAVHandleHeapStart; UAVHandle.ptr += HeapIdx * DescHandleIncSize; ID3D12Resource *CounterBuffer = R.HasCounter ? RS.Buffer.Get() : nullptr; @@ -959,7 +965,7 @@ class DXDevice : public offloadtest::Device { return (Sz + 255u) & 0xFFFFFFFFFFFFFF00; } - llvm::Expected createUnmappedCBV(Resource &R, + llvm::Expected createReservedCBV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; @@ -983,8 +989,10 @@ class DXDevice : public offloadtest::Device { for (const auto &ResData : R.BufferPtr->Data) { llvm::outs() << "Creating CBV: { Size = " << BufferSize << ", Register = b" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space - << ", TilesMapped = " << R.TilesMapped << " }\n"; + << ", Space = " << R.DXBinding.Space; + if (R.TilesMapped) + llvm::outs() << ", TilesMapped = " << *R.TilesMapped; + llvm::outs() << " }\n"; // Reserved CBV resource ComPtr Buffer; @@ -1008,7 +1016,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(R.TilesMapped); + UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -1078,8 +1086,8 @@ class DXDevice : public offloadtest::Device { return Bundle; } - llvm::Expected createFullyMappedCBV(Resource &R, - InvocationState &IS) { + llvm::Expected createCommittedCBV(Resource &R, + InvocationState &IS) { ResourceBundle Bundle; const size_t CBVSize = getCBVSize(R.size()); @@ -1147,9 +1155,9 @@ class DXDevice : public offloadtest::Device { } llvm::Expected createCBV(Resource &R, InvocationState &IS) { - if (R.TilesMapped != -1) - return createUnmappedCBV(R, IS); - return createFullyMappedCBV(R, IS); + if (R.TilesMapped) + return createReservedCBV(R, IS); + return createCommittedCBV(R, IS); } // returns the next available HeapIdx diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index e852c19f0..801487f59 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -776,8 +776,8 @@ class VKDevice : public offloadtest::Device { std::errc::invalid_argument, "No RenderTarget buffer specified for graphics pipeline."); Resource FrameBuffer = { - ResourceKind::Texture2D, "RenderTarget", {}, {}, - P.Bindings.RTargetBufferPtr, false}; + ResourceKind::Texture2D, "RenderTarget", {}, {}, + P.Bindings.RTargetBufferPtr, false, std::nullopt}; IS.FrameBufferResource.Size = P.Bindings.RTargetBufferPtr->size(); IS.FrameBufferResource.BufferPtr = P.Bindings.RTargetBufferPtr; IS.FrameBufferResource.ImageLayout = @@ -804,8 +804,8 @@ class VKDevice : public offloadtest::Device { std::errc::invalid_argument, "No Vertex buffer specified for graphics pipeline."); const Resource VertexBuffer = { - ResourceKind::StructuredBuffer, "VertexBuffer", {}, {}, - P.Bindings.VertexBufferPtr, false}; + ResourceKind::StructuredBuffer, "VertexBuffer", {}, {}, + P.Bindings.VertexBufferPtr, false, std::nullopt}; auto ExVHostBuf = createBuffer(IS, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VertexBuffer.size(), diff --git a/test/Feature/HLSLLib/PartiallyMappedResources.test b/test/Feature/HLSLLib/PartiallyMappedResources.test index 26b8ff333..cd90df01a 100644 --- a/test/Feature/HLSLLib/PartiallyMappedResources.test +++ b/test/Feature/HLSLLib/PartiallyMappedResources.test @@ -10,10 +10,10 @@ StructuredBuffer Y : register(t1); RWStructuredBuffer Out : register(u2); RWStructuredBuffer CAFM : register(u3); - [numthreads(1,1,1)] void main() { - // 32 S structs inside X or Y occupy 64KB of data. (32 * 512 ints * 4 bytes per int) + // 32 S structs inside X or Y occupy 64KB of data. + // (32 * 512 ints * 4 bytes per int) // So, any index into the buffer >= [32] will access a new "tile" int idx = 0; @@ -54,12 +54,12 @@ Buffers: Format: Int32 Stride: 2048 # S is 512 ints, 512*4 = 2048. FillSize: 131072 - FillValue: 1 + FillValue: 9001 - Name: Y Format: Int32 Stride: 2048 FillSize: 131072 - FillValue: 2 + FillValue: 9002 - Name: Out Format: Int32 Stride: 4 @@ -69,7 +69,7 @@ Buffers: Stride: 4 # first 4 values are the actual data retrieved. For non-resident loads, 0 is expected. # last 4 values are the status. 1 is expected for resident memory, 0 for non-resident - Data: [1, 0, 0, 0, 1, 0, 0, 0] + Data: [9001, 0, 0, 0, 1, 0, 0, 0] - Name: CAFM Format: Int32 Stride: 4 @@ -124,7 +124,12 @@ DescriptorSets: Binding: 3 #--- end -# XFAIL: Clang +# Unimplemented https://github.com/llvm/llvm-project/issues/138910 +# AND https://github.com/llvm/llvm-project/issues/99204 +# XFAIL: Clang + +# Unimplemented https://github.com/llvm/llvm-project/issues/138910 +# AND https://github.com/llvm/llvm-project/issues/99204 # XFAIL: Vulkan # RUN: split-file %s %t From d3cbf91a14a9f283daedc55c653eb7ed50f6235b Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Mon, 20 Oct 2025 19:02:55 -0700 Subject: [PATCH 09/20] fix build errors --- lib/API/DX/Device.cpp | 12 ++++++------ lib/Support/Pipeline.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 08f1311a4..a030090ae 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -564,7 +564,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(*R.TilesMapped); + const UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -609,7 +609,7 @@ class DXDevice : public offloadtest::Device { // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE Range = {0, 0}; // no reads expected + const D3D12_RANGE Range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); // Zero remaining bytes if the buffer is padded @@ -779,7 +779,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(*R.TilesMapped); + const UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -824,7 +824,7 @@ class DXDevice : public offloadtest::Device { // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE Range = {0, 0}; // no reads expected + const D3D12_RANGE Range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); // Zero remaining bytes if the buffer is padded @@ -1016,7 +1016,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = static_cast(*R.TilesMapped); + const UINT NumTiles = static_cast(*R.TilesMapped); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -1061,7 +1061,7 @@ class DXDevice : public offloadtest::Device { // Upload data initialization void *ResDataPtr = nullptr; - D3D12_RANGE Range = {0, 0}; // no reads expected + const D3D12_RANGE Range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); // Zero remaining bytes if the buffer is padded diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 1178e3b74..633456a76 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -291,7 +291,7 @@ void MappingTraits::mapping(IO &I, I.mapRequired("Name", R.Name); I.mapRequired("Kind", R.Kind); I.mapOptional("HasCounter", R.HasCounter, 0); - I.mapOptional("TilesMapped", R.TilesMapped, -1); + I.mapOptional("TilesMapped", R.TilesMapped); I.mapRequired("DirectXBinding", R.DXBinding); I.mapOptional("VulkanBinding", R.VKBinding); } From 8d9cbbb3a44a92e84fdcb159db411e9836e7a453 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 21 Oct 2025 13:33:10 -0700 Subject: [PATCH 10/20] attempt to unify SRV --- lib/API/DX/Device.cpp | 103 ++++++++++-------------------------------- 1 file changed, 24 insertions(+), 79 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index a030090ae..1db228bd5 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -149,9 +149,12 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) { const uint32_t Width = R.isTexture() ? B.OutputProps.Width : getUAVBufferSize(R); const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; - const D3D12_TEXTURE_LAYOUT Layout = R.isTexture() - ? D3D12_TEXTURE_LAYOUT_UNKNOWN - : D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + D3D12_TEXTURE_LAYOUT Layout; + if (R.isTexture() && getDXKind(R.Kind) == SRV) + Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + else + Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + const D3D12_RESOURCE_FLAGS Flags = R.isReadWrite() ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; @@ -523,25 +526,25 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } - llvm::Expected createReservedSRV(Resource &R, - InvocationState &IS) { + llvm::Expected createSRV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; - const uint32_t BufferSize = R.size(); const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(BufferSize); + CD3DX12_RESOURCE_DESC::Buffer(R.size()); + const D3D12_HEAP_PROPERTIES UploadHeapProps = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); uint32_t RegOffset = 0; + for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating SRV: { Size = " << BufferSize - << ", Register = t" << R.DXBinding.Register + RegOffset + llvm::outs() << "Creating SRV: { Size = " << R.size() << ", Register = t" + << R.DXBinding.Register + RegOffset << ", Space = " << R.DXBinding.Space; + if (R.TilesMapped) llvm::outs() << ", TilesMapped = " << *R.TilesMapped; llvm::outs() << " }\n"; - // Reserved SRV resource ComPtr Buffer; if (auto Err = HR::toError(Device->CreateReservedResource( @@ -550,11 +553,8 @@ class DXDevice : public offloadtest::Device { "Failed to create reserved resource (buffer).")) return Err; - // Committed Upload Buffer (CPU visible) + // Committed upload buffer ComPtr UploadBuffer; - const D3D12_HEAP_PROPERTIES UploadHeapProps = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - if (auto Err = HR::toError( Device->CreateCommittedResource( &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, @@ -564,7 +564,15 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - const UINT NumTiles = static_cast(*R.TilesMapped); + UINT NumTiles = 0; + if (R.TilesMapped.has_value()) { + NumTiles = static_cast(*R.TilesMapped); + } else { + // Map the entire buffer by computing how many 64KB tiles cover it + NumTiles = static_cast( + (ResDesc.Width + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT - 1) / + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); + } ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -612,11 +620,6 @@ class DXDevice : public offloadtest::Device { const D3D12_RANGE Range = {0, 0}; // no reads expected if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); - // Zero remaining bytes if the buffer is padded - if (R.size() < BufferSize) { - memset(static_cast(ResDataPtr) + R.size(), 0, - BufferSize - R.size()); - } UploadBuffer->Unmap(0, nullptr); } else { return llvm::createStringError(std::errc::io_error, @@ -630,67 +633,9 @@ class DXDevice : public offloadtest::Device { Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); RegOffset++; } - return Bundle; } - llvm::Expected createCommittedSRV(Resource &R, - InvocationState &IS) { - ResourceBundle Bundle; - - const D3D12_HEAP_PROPERTIES HeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); - const D3D12_HEAP_PROPERTIES UploadHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(R.size()); - - uint32_t RegOffset = 0; - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating SRV: { Size = " << R.size() << ", Register = t" - << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space << " }\n"; - - ComPtr Buffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &HeapProp, D3D12_HEAP_FLAG_NONE, &ResDesc, - D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&Buffer)), - "Failed to create committed resource (buffer).")) - return Err; - - ComPtr UploadBuffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProp, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - // Initialize the SRV data - void *ResDataPtr = nullptr; - if (auto Err = HR::toError(UploadBuffer->Map(0, nullptr, &ResDataPtr), - "Failed to acquire UAV data pointer.")) - return Err; - memcpy(ResDataPtr, ResData.get(), R.size()); - UploadBuffer->Unmap(0, nullptr); - - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - - Bundle.emplace_back(UploadBuffer, Buffer, nullptr); - RegOffset++; - } - return Bundle; - } - - llvm::Expected createSRV(Resource &R, InvocationState &IS) { - if (R.TilesMapped) - return createReservedSRV(R, IS); - return createCommittedSRV(R, IS); - } - // returns the next available HeapIdx uint32_t bindSRV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { From fb4a631041c8b23dcc5230fb9cb4515dcf2d42b7 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 22 Oct 2025 15:03:17 -0700 Subject: [PATCH 11/20] fix texturelayout flag and another flag --- lib/API/DX/Device.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 1db228bd5..71224f862 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -150,8 +150,10 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) { R.isTexture() ? B.OutputProps.Width : getUAVBufferSize(R); const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; D3D12_TEXTURE_LAYOUT Layout; - if (R.isTexture() && getDXKind(R.Kind) == SRV) - Layout = D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + if (R.isTexture()) + Layout = getDXKind(R.Kind) == SRV + ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE + : D3D12_TEXTURE_LAYOUT_UNKNOWN; else Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; @@ -589,7 +591,7 @@ class DXDevice : public offloadtest::Device { HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; HeapDesc.SizeInBytes = static_cast(NumTiles) * D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; if (auto Err = HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), From 1c9752fd4eb83f1cd7bc475ba8230bb75cbb95f3 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 22 Oct 2025 15:06:56 -0700 Subject: [PATCH 12/20] add metal XFAIL --- test/Feature/HLSLLib/PartiallyMappedResources.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/Feature/HLSLLib/PartiallyMappedResources.test b/test/Feature/HLSLLib/PartiallyMappedResources.test index cd90df01a..956117412 100644 --- a/test/Feature/HLSLLib/PartiallyMappedResources.test +++ b/test/Feature/HLSLLib/PartiallyMappedResources.test @@ -132,6 +132,10 @@ DescriptorSets: # AND https://github.com/llvm/llvm-project/issues/99204 # XFAIL: Vulkan +# Bug https://github.com/llvm/offload-test-suite/issues/182 +# Metal API seems to have problems with reserved resources +# XFAIL: Metal + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o From 3e78f39985bf52f8d0e386d402a3090420f4dcc1 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 10:39:28 -0700 Subject: [PATCH 13/20] improve, but keep UAV the same --- lib/API/DX/Device.cpp | 205 +++++++++--------------------------------- 1 file changed, 41 insertions(+), 164 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 71224f862..423f50233 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -151,7 +151,7 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) { const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; D3D12_TEXTURE_LAYOUT Layout; if (R.isTexture()) - Layout = getDXKind(R.Kind) == SRV + Layout = getDXKind(R.Kind) == SRV || getDXKind(R.Kind) == UAV ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE : D3D12_TEXTURE_LAYOUT_UNKNOWN; else @@ -528,6 +528,18 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } + UINT getNumTiles(std::optional NumTiles, UINT64 Width) { + UINT Ret; + if (NumTiles.has_value()) + Ret = static_cast(*NumTiles); + else + // Map the entire buffer by computing how many 64KB tiles cover it + Ret = static_cast( + (Width + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES - 1) / + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES); + return Ret; + } + llvm::Expected createSRV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); @@ -566,15 +578,7 @@ class DXDevice : public offloadtest::Device { return Err; // Tile mapping setup (optional if NumTiles > 0) - UINT NumTiles = 0; - if (R.TilesMapped.has_value()) { - NumTiles = static_cast(*R.TilesMapped); - } else { - // Map the entire buffer by computing how many 64KB tiles cover it - NumTiles = static_cast( - (ResDesc.Width + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT - 1) / - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); - } + const UINT NumTiles = getNumTiles(R.TilesMapped, ResDesc.Width); ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { @@ -660,144 +664,7 @@ class DXDevice : public offloadtest::Device { return HeapIdx; } - llvm::Expected createReservedUAV(Resource &R, - InvocationState &IS) { - ResourceBundle Bundle; - const uint32_t BufferSize = getUAVBufferSize(R); - const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R); - - const D3D12_HEAP_PROPERTIES ReadBackHeapProp = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK); - const D3D12_RESOURCE_DESC ReadBackResDesc = { - D3D12_RESOURCE_DIMENSION_BUFFER, - 0, - BufferSize, - 1, - 1, - 1, - DXGI_FORMAT_UNKNOWN, - {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_NONE}; - - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(BufferSize); - - uint32_t RegOffset = 0; - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating UAV: { Size = " << BufferSize - << ", Register = u" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space - << ", HasCounter = " << R.HasCounter; - if (R.TilesMapped) - llvm::outs() << ", TilesMapped = " << *R.TilesMapped; - llvm::outs() << " }\n"; - - // Reserved UAV resource - ComPtr Buffer; - if (auto Err = - HR::toError(Device->CreateReservedResource( - &ResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&Buffer)), - "Failed to create reserved resource (buffer).")) - return Err; - - // Committed Upload Buffer (CPU visible) - ComPtr UploadBuffer; - const D3D12_HEAP_PROPERTIES UploadHeapProps = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - // Readback buffer (committed) - ComPtr ReadBackBuffer; - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &ReadBackHeapProp, D3D12_HEAP_FLAG_NONE, &ReadBackResDesc, - D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&ReadBackBuffer)), - "Failed to create committed resource (readback buffer).")) - return Err; - - // Tile mapping setup (optional if NumTiles > 0) - const UINT NumTiles = static_cast(*R.TilesMapped); - ComPtr Heap; // optional, only created if NumTiles > 0 - - if (NumTiles > 0) { - std::vector StartCoords(NumTiles); - std::vector RegionSizes(NumTiles); - std::vector RangeFlags( - NumTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector HeapRangeStartOffsets(NumTiles); - std::vector RangeTileCounts(NumTiles, 1); - - // Create a heap large enough for the mapped tiles - D3D12_HEAP_DESC HeapDesc = {}; - HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - HeapDesc.SizeInBytes = static_cast(NumTiles) * - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - - if (auto Err = - HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), - "Failed to create heap for tiled UAV resource.")) - return Err; - - // Fill tile coordinates and region sizes - for (UINT I = 0; I < NumTiles; ++I) { - StartCoords[I] = {I, 0, 0, 0}; - RegionSizes[I].NumTiles = 1; - RegionSizes[I].UseBox = FALSE; - HeapRangeStartOffsets[I] = I; - } - - // Retrieve a command queue from InvocationState - ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - - // Map the first NumTiles tiles in the Buffer - CommandQueue->UpdateTileMappings( - Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), - Heap.Get(), NumTiles, RangeFlags.data(), - HeapRangeStartOffsets.data(), RangeTileCounts.data(), - D3D12_TILE_MAPPING_FLAG_NONE); - } - - // Upload data initialization - void *ResDataPtr = nullptr; - const D3D12_RANGE Range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { - memcpy(ResDataPtr, ResData.get(), R.size()); - // Zero remaining bytes if the buffer is padded - if (R.size() < BufferSize) { - memset(static_cast(ResDataPtr) + R.size(), 0, - BufferSize - R.size()); - } - UploadBuffer->Unmap(0, nullptr); - } else { - return llvm::createStringError(std::errc::io_error, - "Failed to map upload buffer."); - } - - // Add GPU upload commands - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - - // Store heap in Bundle so it lives until caller releases the Bundle - Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer, Heap); - RegOffset++; - } - - return Bundle; - } - - llvm::Expected createCommittedUAV(Resource &R, - InvocationState &IS) { + llvm::Expected createUAV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; const uint32_t BufferSize = getUAVBufferSize(R); @@ -872,13 +739,7 @@ class DXDevice : public offloadtest::Device { } return Bundle; } - - llvm::Expected createUAV(Resource &R, InvocationState &IS) { - if (R.TilesMapped) - return createReservedUAV(R, IS); - return createCommittedUAV(R, IS); - } - + // returns the next available HeapIdx uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { @@ -1221,6 +1082,9 @@ class DXDevice : public offloadtest::Device { } void addReadbackBeginBarrier(InvocationState &IS, ComPtr R) { + const D3D12_RESOURCE_BARRIER b = CD3DX12_RESOURCE_BARRIER::UAV(R.Get()); + IS.CmdList->ResourceBarrier(1, &b); + const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); @@ -1703,6 +1567,18 @@ class DXDevice : public offloadtest::Device { return llvm::Error::success(); } + llvm::Error waitThenReturnErr(llvm::Error Err, InvocationState &IS) { + // Wait on the GPU before returning the error + llvm::Error WaitErr = waitForSignal(IS); + if (WaitErr) + // joinErrors returns an Error by value (move-only). Just return it + // directly. + return llvm::joinErrors(std::move(WaitErr), std::move(Err)); + + // No waiting error, just return the moved original. + return Err; + } + llvm::Error executeProgram(Pipeline &P) override { llvm::sys::AddSignalHandler( [](void *Cookie) { @@ -1746,7 +1622,8 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Buffers created.\n"; if (auto Err = createEvent(State)) - return Err; + return waitThenReturnErr(std::move(Err), State); + llvm::outs() << "Event prepared.\n"; if (P.isCompute()) { @@ -1756,33 +1633,33 @@ class DXDevice : public offloadtest::Device { std::errc::invalid_argument, "Compute pipeline must have exactly one compute shader."); if (auto Err = createComputePSO(P.Shaders[0].Shader->getBuffer(), State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "PSO created.\n"; if (auto Err = createComputeCommands(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Compute command list created.\n"; } else { // Create render target, readback and vertex buffer and PSO. if (auto Err = createRenderTarget(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Render target created.\n"; if (auto Err = createVertexBuffer(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Vertex buffer created.\n"; if (auto Err = createGraphicsPSO(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Graphics PSO created.\n"; if (auto Err = createGraphicsCommands(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Graphics command list created complete.\n"; } if (auto Err = executeCommandList(State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Compute commands executed.\n"; if (auto Err = readBack(P, State)) - return Err; + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Read data back.\n"; return llvm::Error::success(); From fb867274badb2601e626fa3acb656c7d5a14e2d3 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 10:53:48 -0700 Subject: [PATCH 14/20] remove unwanted changes --- lib/API/DX/Device.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 423f50233..8f2b5fbdb 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -151,7 +151,7 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) { const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; D3D12_TEXTURE_LAYOUT Layout; if (R.isTexture()) - Layout = getDXKind(R.Kind) == SRV || getDXKind(R.Kind) == UAV + Layout = getDXKind(R.Kind) == SRV ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE : D3D12_TEXTURE_LAYOUT_UNKNOWN; else @@ -739,7 +739,6 @@ class DXDevice : public offloadtest::Device { } return Bundle; } - // returns the next available HeapIdx uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { @@ -1082,9 +1081,6 @@ class DXDevice : public offloadtest::Device { } void addReadbackBeginBarrier(InvocationState &IS, ComPtr R) { - const D3D12_RESOURCE_BARRIER b = CD3DX12_RESOURCE_BARRIER::UAV(R.Get()); - IS.CmdList->ResourceBarrier(1, &b); - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); From b9b202bcd40420576acffe7aae399f8d0bb1d522 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 10:56:20 -0700 Subject: [PATCH 15/20] address confusing comment Justin pointed out --- lib/API/DX/Device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 8f2b5fbdb..572f96329 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -577,7 +577,7 @@ class DXDevice : public offloadtest::Device { "Failed to create committed resource (upload buffer).")) return Err; - // Tile mapping setup (optional if NumTiles > 0) + // Tile mapping setup (only skipped when TilesMapped is set to 0) const UINT NumTiles = getNumTiles(R.TilesMapped, ResDesc.Width); ComPtr Heap; // optional, only created if NumTiles > 0 From 361225b8e5c9467d0567bbf4e937c5c97027c85f Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 11:08:22 -0700 Subject: [PATCH 16/20] clang-format --- lib/API/DX/Device.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 572f96329..ade435a3b 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -528,7 +528,7 @@ class DXDevice : public offloadtest::Device { addUploadEndBarrier(IS, Destination, R.isReadWrite()); } - UINT getNumTiles(std::optional NumTiles, UINT64 Width) { + UINT getNumTiles(std::optional NumTiles, UINT64 Width) { UINT Ret; if (NumTiles.has_value()) Ret = static_cast(*NumTiles); @@ -1619,7 +1619,7 @@ class DXDevice : public offloadtest::Device { llvm::outs() << "Buffers created.\n"; if (auto Err = createEvent(State)) return waitThenReturnErr(std::move(Err), State); - + llvm::outs() << "Event prepared.\n"; if (P.isCompute()) { @@ -1632,13 +1632,13 @@ class DXDevice : public offloadtest::Device { return waitThenReturnErr(std::move(Err), State); llvm::outs() << "PSO created.\n"; if (auto Err = createComputeCommands(P, State)) - return waitThenReturnErr(std::move(Err), State); + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Compute command list created.\n"; } else { // Create render target, readback and vertex buffer and PSO. if (auto Err = createRenderTarget(P, State)) - return waitThenReturnErr(std::move(Err), State); + return waitThenReturnErr(std::move(Err), State); llvm::outs() << "Render target created.\n"; if (auto Err = createVertexBuffer(P, State)) return waitThenReturnErr(std::move(Err), State); From eb89f867a684837f09c0457d83ff095f654eb281 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 12:51:45 -0700 Subject: [PATCH 17/20] address damyan, xfail intel --- lib/API/DX/Device.cpp | 66 ++++++------------- .../HLSLLib/PartiallyMappedResources.test | 3 + 2 files changed, 24 insertions(+), 45 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index ade435a3b..02b54f646 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -582,13 +582,6 @@ class DXDevice : public offloadtest::Device { ComPtr Heap; // optional, only created if NumTiles > 0 if (NumTiles > 0) { - std::vector StartCoords(NumTiles); - std::vector RegionSizes(NumTiles); - std::vector RangeFlags( - NumTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector HeapRangeStartOffsets(NumTiles); - std::vector RangeTileCounts(NumTiles, 1); - // Create a Heap large enough for the mapped tiles D3D12_HEAP_DESC HeapDesc = {}; HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); @@ -602,29 +595,26 @@ class DXDevice : public offloadtest::Device { "Failed to create heap for tiled SRV resource.")) return Err; - // Fill tile coordinates and region sizes - for (UINT I = 0; I < NumTiles; ++I) { - StartCoords[I] = {I, 0, 0, 0}; - RegionSizes[I].NumTiles = 1; - RegionSizes[I].UseBox = FALSE; - HeapRangeStartOffsets[I] = I; - } + // Define one contiguous mapping region + D3D12_TILED_RESOURCE_COORDINATE startCoord = {0, 0, 0, 0}; + D3D12_TILE_REGION_SIZE regionSize = {}; + regionSize.NumTiles = NumTiles; + regionSize.UseBox = FALSE; - // Retrieve a command queue from InvocationState - ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); + D3D12_TILE_RANGE_FLAGS rangeFlag = D3D12_TILE_RANGE_FLAG_NONE; + UINT heapRangeStartOffset = 0; + UINT rangeTileCount = NumTiles; - // Map the first NumTiles tiles in the Buffer + ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); CommandQueue->UpdateTileMappings( - Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), - Heap.Get(), NumTiles, RangeFlags.data(), - HeapRangeStartOffsets.data(), RangeTileCounts.data(), + Buffer.Get(), 1, &startCoord, ®ionSize, // One region + Heap.Get(), 1, &rangeFlag, &heapRangeStartOffset, &rangeTileCount, D3D12_TILE_MAPPING_FLAG_NONE); } // Upload data initialization void *ResDataPtr = nullptr; - const D3D12_RANGE Range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { + if (SUCCEEDED(UploadBuffer->Map(0, NULL, &ResDataPtr))) { memcpy(ResDataPtr, ResData.get(), R.size()); UploadBuffer->Unmap(0, nullptr); } else { @@ -632,10 +622,8 @@ class DXDevice : public offloadtest::Device { "Failed to map SRV upload buffer."); } - // Add GPU upload commands addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - // Store heap in Bundle so it lives until caller releases the Bundle Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); RegOffset++; } @@ -1563,18 +1551,6 @@ class DXDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error waitThenReturnErr(llvm::Error Err, InvocationState &IS) { - // Wait on the GPU before returning the error - llvm::Error WaitErr = waitForSignal(IS); - if (WaitErr) - // joinErrors returns an Error by value (move-only). Just return it - // directly. - return llvm::joinErrors(std::move(WaitErr), std::move(Err)); - - // No waiting error, just return the moved original. - return Err; - } - llvm::Error executeProgram(Pipeline &P) override { llvm::sys::AddSignalHandler( [](void *Cookie) { @@ -1618,7 +1594,7 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Buffers created.\n"; if (auto Err = createEvent(State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Event prepared.\n"; @@ -1629,33 +1605,33 @@ class DXDevice : public offloadtest::Device { std::errc::invalid_argument, "Compute pipeline must have exactly one compute shader."); if (auto Err = createComputePSO(P.Shaders[0].Shader->getBuffer(), State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "PSO created.\n"; if (auto Err = createComputeCommands(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Compute command list created.\n"; } else { // Create render target, readback and vertex buffer and PSO. if (auto Err = createRenderTarget(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Render target created.\n"; if (auto Err = createVertexBuffer(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Vertex buffer created.\n"; if (auto Err = createGraphicsPSO(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Graphics PSO created.\n"; if (auto Err = createGraphicsCommands(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Graphics command list created complete.\n"; } if (auto Err = executeCommandList(State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Compute commands executed.\n"; if (auto Err = readBack(P, State)) - return waitThenReturnErr(std::move(Err), State); + return Err; llvm::outs() << "Read data back.\n"; return llvm::Error::success(); diff --git a/test/Feature/HLSLLib/PartiallyMappedResources.test b/test/Feature/HLSLLib/PartiallyMappedResources.test index 956117412..017538ec0 100644 --- a/test/Feature/HLSLLib/PartiallyMappedResources.test +++ b/test/Feature/HLSLLib/PartiallyMappedResources.test @@ -136,6 +136,9 @@ DescriptorSets: # Metal API seems to have problems with reserved resources # XFAIL: Metal +# Bug https://github.com/llvm/offload-test-suite/issues/485 +# XFAIL: Intel + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o From c59e9e296c4539ce519839ebab3a1c3674028f8d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 13:37:49 -0700 Subject: [PATCH 18/20] remove reserved CBV function, and capitalize var names --- lib/API/DX/Device.cpp | 144 +++--------------------------------------- 1 file changed, 8 insertions(+), 136 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 02b54f646..7f0d738de 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -596,19 +596,19 @@ class DXDevice : public offloadtest::Device { return Err; // Define one contiguous mapping region - D3D12_TILED_RESOURCE_COORDINATE startCoord = {0, 0, 0, 0}; - D3D12_TILE_REGION_SIZE regionSize = {}; + D3D12_TILED_RESOURCE_COORDINATE StartCoord = {0, 0, 0, 0}; + D3D12_TILE_REGION_SIZE RegionSize = {}; regionSize.NumTiles = NumTiles; regionSize.UseBox = FALSE; - D3D12_TILE_RANGE_FLAGS rangeFlag = D3D12_TILE_RANGE_FLAG_NONE; - UINT heapRangeStartOffset = 0; - UINT rangeTileCount = NumTiles; + D3D12_TILE_RANGE_FLAGS RangeFlag = D3D12_TILE_RANGE_FLAG_NONE; + UINT HeapRangeStartOffset = 0; + UINT RangeTileCount = NumTiles; ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); CommandQueue->UpdateTileMappings( - Buffer.Get(), 1, &startCoord, ®ionSize, // One region - Heap.Get(), 1, &rangeFlag, &heapRangeStartOffset, &rangeTileCount, + Buffer.Get(), 1, &StartCoord, &RegionSize, // One region + Heap.Get(), 1, &RangeFlag, &HeapRangeStartOffset, &RangeTileCount, D3D12_TILE_MAPPING_FLAG_NONE); } @@ -760,129 +760,7 @@ class DXDevice : public offloadtest::Device { return (Sz + 255u) & 0xFFFFFFFFFFFFFF00; } - llvm::Expected createReservedCBV(Resource &R, - InvocationState &IS) { - ResourceBundle Bundle; - - const size_t BufferSize = getCBVSize(R.size()); - const D3D12_RESOURCE_DESC ResDesc = { - D3D12_RESOURCE_DIMENSION_BUFFER, - 0, - BufferSize, - 1, - 1, - 1, - DXGI_FORMAT_UNKNOWN, - {1, 0}, - D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS}; - - const D3D12_RESOURCE_DESC UploadResDesc = - CD3DX12_RESOURCE_DESC::Buffer(BufferSize); - - uint32_t RegOffset = 0; - for (const auto &ResData : R.BufferPtr->Data) { - llvm::outs() << "Creating CBV: { Size = " << BufferSize - << ", Register = b" << R.DXBinding.Register + RegOffset - << ", Space = " << R.DXBinding.Space; - if (R.TilesMapped) - llvm::outs() << ", TilesMapped = " << *R.TilesMapped; - llvm::outs() << " }\n"; - - // Reserved CBV resource - ComPtr Buffer; - if (auto Err = - HR::toError(Device->CreateReservedResource( - &ResDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, - IID_PPV_ARGS(&Buffer)), - "Failed to create reserved resource (buffer).")) - return Err; - - // Committed Upload Buffer (CPU visible) - ComPtr UploadBuffer; - const D3D12_HEAP_PROPERTIES UploadHeapProps = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); - if (auto Err = HR::toError( - Device->CreateCommittedResource( - &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&UploadBuffer)), - "Failed to create committed resource (upload buffer).")) - return Err; - - // Tile mapping setup (optional if NumTiles > 0) - const UINT NumTiles = static_cast(*R.TilesMapped); - ComPtr Heap; // optional, only created if NumTiles > 0 - - if (NumTiles > 0) { - std::vector StartCoords(NumTiles); - std::vector RegionSizes(NumTiles); - std::vector RangeFlags( - NumTiles, D3D12_TILE_RANGE_FLAG_NONE); - std::vector HeapRangeStartOffsets(NumTiles); - std::vector RangeTileCounts(NumTiles, 1); - - // Create a heap large enough for the mapped tiles - D3D12_HEAP_DESC HeapDesc = {}; - HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - HeapDesc.SizeInBytes = static_cast(NumTiles) * - D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; - HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; - - if (auto Err = - HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)), - "Failed to create heap for tiled CBV resource.")) - return Err; - - // Fill tile coordinates and region sizes - for (UINT I = 0; I < NumTiles; ++I) { - StartCoords[I] = {I, 0, 0, 0}; - RegionSizes[I].NumTiles = 1; - RegionSizes[I].UseBox = FALSE; - HeapRangeStartOffsets[I] = I; - } - - // Retrieve a command queue from InvocationState - ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); - - // Map the first NumTiles tiles in the Buffer - CommandQueue->UpdateTileMappings( - Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(), - Heap.Get(), NumTiles, RangeFlags.data(), - HeapRangeStartOffsets.data(), RangeTileCounts.data(), - D3D12_TILE_MAPPING_FLAG_NONE); - } - - // Upload data initialization - void *ResDataPtr = nullptr; - const D3D12_RANGE Range = {0, 0}; // no reads expected - if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) { - memcpy(ResDataPtr, ResData.get(), R.size()); - // Zero remaining bytes if the buffer is padded - if (R.size() < BufferSize) { - memset(static_cast(ResDataPtr) + R.size(), 0, - BufferSize - R.size()); - } - UploadBuffer->Unmap(0, nullptr); - } else { - return llvm::createStringError(std::errc::io_error, - "Failed to map CBV upload buffer."); - } - - // Add GPU upload commands - addResourceUploadCommands(R, IS, Buffer, UploadBuffer); - - // Store resource bundle (heap optional) - Bundle.emplace_back(UploadBuffer, Buffer, nullptr, Heap); - RegOffset++; - } - - return Bundle; - } - - llvm::Expected createCommittedCBV(Resource &R, - InvocationState &IS) { + llvm::Expected createCBV(Resource &R, InvocationState &IS) { ResourceBundle Bundle; const size_t CBVSize = getCBVSize(R.size()); @@ -949,12 +827,6 @@ class DXDevice : public offloadtest::Device { return Bundle; } - llvm::Expected createCBV(Resource &R, InvocationState &IS) { - if (R.TilesMapped) - return createReservedCBV(R, IS); - return createCommittedCBV(R, IS); - } - // returns the next available HeapIdx uint32_t bindCBV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { From 6a7878d3b93a16a9eea5f2dde1c4efbab2d819c0 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 14:08:52 -0700 Subject: [PATCH 19/20] add some missing constts --- lib/API/DX/Device.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 7f0d738de..db79addd4 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -596,14 +596,14 @@ class DXDevice : public offloadtest::Device { return Err; // Define one contiguous mapping region - D3D12_TILED_RESOURCE_COORDINATE StartCoord = {0, 0, 0, 0}; + const D3D12_TILED_RESOURCE_COORDINATE StartCoord = {0, 0, 0, 0}; D3D12_TILE_REGION_SIZE RegionSize = {}; - regionSize.NumTiles = NumTiles; - regionSize.UseBox = FALSE; + RegionSize.NumTiles = NumTiles; + RegionSize.UseBox = FALSE; - D3D12_TILE_RANGE_FLAGS RangeFlag = D3D12_TILE_RANGE_FLAG_NONE; - UINT HeapRangeStartOffset = 0; - UINT RangeTileCount = NumTiles; + const D3D12_TILE_RANGE_FLAGS RangeFlag = D3D12_TILE_RANGE_FLAG_NONE; + const UINT HeapRangeStartOffset = 0; + const UINT RangeTileCount = NumTiles; ID3D12CommandQueue *CommandQueue = IS.Queue.Get(); CommandQueue->UpdateTileMappings( From 06bf89ceb58d6932dca2cbad8bc22f55cf05b388 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Fri, 24 Oct 2025 17:32:31 -0700 Subject: [PATCH 20/20] self review --- lib/API/DX/Device.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index db79addd4..6ef0f2ffd 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -727,6 +727,7 @@ class DXDevice : public offloadtest::Device { } return Bundle; } + // returns the next available HeapIdx uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx, ResourceBundle ResBundle) { @@ -741,10 +742,7 @@ class DXDevice : public offloadtest::Device { for (const ResourceSet &RS : ResBundle) { llvm::outs() << "UAV: HeapIdx = " << HeapIdx << " EltSize = " << EltSize << " NumElts = " << NumElts - << " HasCounter = " << R.HasCounter; - if (R.TilesMapped) - llvm::outs() << ", TilesMapped = " << *R.TilesMapped; - llvm::outs() << " }\n"; + << " HasCounter = " << R.HasCounter << "\n"; D3D12_CPU_DESCRIPTOR_HANDLE UAVHandle = UAVHandleHeapStart; UAVHandle.ptr += HeapIdx * DescHandleIncSize; @@ -1467,7 +1465,6 @@ class DXDevice : public offloadtest::Device { llvm::outs() << "Buffers created.\n"; if (auto Err = createEvent(State)) return Err; - llvm::outs() << "Event prepared.\n"; if (P.isCompute()) {