Skip to content

Commit 3e78f39

Browse files
committed
improve, but keep UAV the same
1 parent 1c9752f commit 3e78f39

File tree

1 file changed

+41
-164
lines changed

1 file changed

+41
-164
lines changed

lib/API/DX/Device.cpp

Lines changed: 41 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) {
151151
const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1;
152152
D3D12_TEXTURE_LAYOUT Layout;
153153
if (R.isTexture())
154-
Layout = getDXKind(R.Kind) == SRV
154+
Layout = getDXKind(R.Kind) == SRV || getDXKind(R.Kind) == UAV
155155
? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE
156156
: D3D12_TEXTURE_LAYOUT_UNKNOWN;
157157
else
@@ -528,6 +528,18 @@ class DXDevice : public offloadtest::Device {
528528
addUploadEndBarrier(IS, Destination, R.isReadWrite());
529529
}
530530

531+
UINT getNumTiles(std::optional<int> NumTiles, UINT64 Width) {
532+
UINT Ret;
533+
if (NumTiles.has_value())
534+
Ret = static_cast<UINT>(*NumTiles);
535+
else
536+
// Map the entire buffer by computing how many 64KB tiles cover it
537+
Ret = static_cast<UINT>(
538+
(Width + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES - 1) /
539+
D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES);
540+
return Ret;
541+
}
542+
531543
llvm::Expected<ResourceBundle> createSRV(Resource &R, InvocationState &IS) {
532544
ResourceBundle Bundle;
533545
const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R);
@@ -566,15 +578,7 @@ class DXDevice : public offloadtest::Device {
566578
return Err;
567579

568580
// Tile mapping setup (optional if NumTiles > 0)
569-
UINT NumTiles = 0;
570-
if (R.TilesMapped.has_value()) {
571-
NumTiles = static_cast<UINT>(*R.TilesMapped);
572-
} else {
573-
// Map the entire buffer by computing how many 64KB tiles cover it
574-
NumTiles = static_cast<UINT>(
575-
(ResDesc.Width + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT - 1) /
576-
D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
577-
}
581+
const UINT NumTiles = getNumTiles(R.TilesMapped, ResDesc.Width);
578582
ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
579583

580584
if (NumTiles > 0) {
@@ -660,144 +664,7 @@ class DXDevice : public offloadtest::Device {
660664
return HeapIdx;
661665
}
662666

663-
llvm::Expected<ResourceBundle> createReservedUAV(Resource &R,
664-
InvocationState &IS) {
665-
ResourceBundle Bundle;
666-
const uint32_t BufferSize = getUAVBufferSize(R);
667-
const D3D12_RESOURCE_DESC ResDesc = getResourceDescription(R);
668-
669-
const D3D12_HEAP_PROPERTIES ReadBackHeapProp =
670-
CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK);
671-
const D3D12_RESOURCE_DESC ReadBackResDesc = {
672-
D3D12_RESOURCE_DIMENSION_BUFFER,
673-
0,
674-
BufferSize,
675-
1,
676-
1,
677-
1,
678-
DXGI_FORMAT_UNKNOWN,
679-
{1, 0},
680-
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
681-
D3D12_RESOURCE_FLAG_NONE};
682-
683-
const D3D12_RESOURCE_DESC UploadResDesc =
684-
CD3DX12_RESOURCE_DESC::Buffer(BufferSize);
685-
686-
uint32_t RegOffset = 0;
687-
for (const auto &ResData : R.BufferPtr->Data) {
688-
llvm::outs() << "Creating UAV: { Size = " << BufferSize
689-
<< ", Register = u" << R.DXBinding.Register + RegOffset
690-
<< ", Space = " << R.DXBinding.Space
691-
<< ", HasCounter = " << R.HasCounter;
692-
if (R.TilesMapped)
693-
llvm::outs() << ", TilesMapped = " << *R.TilesMapped;
694-
llvm::outs() << " }\n";
695-
696-
// Reserved UAV resource
697-
ComPtr<ID3D12Resource> Buffer;
698-
if (auto Err =
699-
HR::toError(Device->CreateReservedResource(
700-
&ResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
701-
IID_PPV_ARGS(&Buffer)),
702-
"Failed to create reserved resource (buffer)."))
703-
return Err;
704-
705-
// Committed Upload Buffer (CPU visible)
706-
ComPtr<ID3D12Resource> UploadBuffer;
707-
const D3D12_HEAP_PROPERTIES UploadHeapProps =
708-
CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
709-
710-
if (auto Err = HR::toError(
711-
Device->CreateCommittedResource(
712-
&UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc,
713-
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
714-
IID_PPV_ARGS(&UploadBuffer)),
715-
"Failed to create committed resource (upload buffer)."))
716-
return Err;
717-
718-
// Readback buffer (committed)
719-
ComPtr<ID3D12Resource> ReadBackBuffer;
720-
if (auto Err = HR::toError(
721-
Device->CreateCommittedResource(
722-
&ReadBackHeapProp, D3D12_HEAP_FLAG_NONE, &ReadBackResDesc,
723-
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
724-
IID_PPV_ARGS(&ReadBackBuffer)),
725-
"Failed to create committed resource (readback buffer)."))
726-
return Err;
727-
728-
// Tile mapping setup (optional if NumTiles > 0)
729-
const UINT NumTiles = static_cast<UINT>(*R.TilesMapped);
730-
ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
731-
732-
if (NumTiles > 0) {
733-
std::vector<D3D12_TILED_RESOURCE_COORDINATE> StartCoords(NumTiles);
734-
std::vector<D3D12_TILE_REGION_SIZE> RegionSizes(NumTiles);
735-
std::vector<D3D12_TILE_RANGE_FLAGS> RangeFlags(
736-
NumTiles, D3D12_TILE_RANGE_FLAG_NONE);
737-
std::vector<UINT> HeapRangeStartOffsets(NumTiles);
738-
std::vector<UINT> RangeTileCounts(NumTiles, 1);
739-
740-
// Create a heap large enough for the mapped tiles
741-
D3D12_HEAP_DESC HeapDesc = {};
742-
HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
743-
HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
744-
HeapDesc.SizeInBytes = static_cast<UINT64>(NumTiles) *
745-
D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
746-
HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
747-
748-
if (auto Err =
749-
HR::toError(Device->CreateHeap(&HeapDesc, IID_PPV_ARGS(&Heap)),
750-
"Failed to create heap for tiled UAV resource."))
751-
return Err;
752-
753-
// Fill tile coordinates and region sizes
754-
for (UINT I = 0; I < NumTiles; ++I) {
755-
StartCoords[I] = {I, 0, 0, 0};
756-
RegionSizes[I].NumTiles = 1;
757-
RegionSizes[I].UseBox = FALSE;
758-
HeapRangeStartOffsets[I] = I;
759-
}
760-
761-
// Retrieve a command queue from InvocationState
762-
ID3D12CommandQueue *CommandQueue = IS.Queue.Get();
763-
764-
// Map the first NumTiles tiles in the Buffer
765-
CommandQueue->UpdateTileMappings(
766-
Buffer.Get(), NumTiles, StartCoords.data(), RegionSizes.data(),
767-
Heap.Get(), NumTiles, RangeFlags.data(),
768-
HeapRangeStartOffsets.data(), RangeTileCounts.data(),
769-
D3D12_TILE_MAPPING_FLAG_NONE);
770-
}
771-
772-
// Upload data initialization
773-
void *ResDataPtr = nullptr;
774-
const D3D12_RANGE Range = {0, 0}; // no reads expected
775-
if (SUCCEEDED(UploadBuffer->Map(0, &Range, &ResDataPtr))) {
776-
memcpy(ResDataPtr, ResData.get(), R.size());
777-
// Zero remaining bytes if the buffer is padded
778-
if (R.size() < BufferSize) {
779-
memset(static_cast<char *>(ResDataPtr) + R.size(), 0,
780-
BufferSize - R.size());
781-
}
782-
UploadBuffer->Unmap(0, nullptr);
783-
} else {
784-
return llvm::createStringError(std::errc::io_error,
785-
"Failed to map upload buffer.");
786-
}
787-
788-
// Add GPU upload commands
789-
addResourceUploadCommands(R, IS, Buffer, UploadBuffer);
790-
791-
// Store heap in Bundle so it lives until caller releases the Bundle
792-
Bundle.emplace_back(UploadBuffer, Buffer, ReadBackBuffer, Heap);
793-
RegOffset++;
794-
}
795-
796-
return Bundle;
797-
}
798-
799-
llvm::Expected<ResourceBundle> createCommittedUAV(Resource &R,
800-
InvocationState &IS) {
667+
llvm::Expected<ResourceBundle> createUAV(Resource &R, InvocationState &IS) {
801668
ResourceBundle Bundle;
802669
const uint32_t BufferSize = getUAVBufferSize(R);
803670

@@ -872,13 +739,7 @@ class DXDevice : public offloadtest::Device {
872739
}
873740
return Bundle;
874741
}
875-
876-
llvm::Expected<ResourceBundle> createUAV(Resource &R, InvocationState &IS) {
877-
if (R.TilesMapped)
878-
return createReservedUAV(R, IS);
879-
return createCommittedUAV(R, IS);
880-
}
881-
742+
882743
// returns the next available HeapIdx
883744
uint32_t bindUAV(Resource &R, InvocationState &IS, uint32_t HeapIdx,
884745
ResourceBundle ResBundle) {
@@ -1221,6 +1082,9 @@ class DXDevice : public offloadtest::Device {
12211082
}
12221083

12231084
void addReadbackBeginBarrier(InvocationState &IS, ComPtr<ID3D12Resource> R) {
1085+
const D3D12_RESOURCE_BARRIER b = CD3DX12_RESOURCE_BARRIER::UAV(R.Get());
1086+
IS.CmdList->ResourceBarrier(1, &b);
1087+
12241088
const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition(
12251089
R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
12261090
D3D12_RESOURCE_STATE_COPY_SOURCE);
@@ -1703,6 +1567,18 @@ class DXDevice : public offloadtest::Device {
17031567
return llvm::Error::success();
17041568
}
17051569

1570+
llvm::Error waitThenReturnErr(llvm::Error Err, InvocationState &IS) {
1571+
// Wait on the GPU before returning the error
1572+
llvm::Error WaitErr = waitForSignal(IS);
1573+
if (WaitErr)
1574+
// joinErrors returns an Error by value (move-only). Just return it
1575+
// directly.
1576+
return llvm::joinErrors(std::move(WaitErr), std::move(Err));
1577+
1578+
// No waiting error, just return the moved original.
1579+
return Err;
1580+
}
1581+
17061582
llvm::Error executeProgram(Pipeline &P) override {
17071583
llvm::sys::AddSignalHandler(
17081584
[](void *Cookie) {
@@ -1746,7 +1622,8 @@ class DXDevice : public offloadtest::Device {
17461622
return Err;
17471623
llvm::outs() << "Buffers created.\n";
17481624
if (auto Err = createEvent(State))
1749-
return Err;
1625+
return waitThenReturnErr(std::move(Err), State);
1626+
17501627
llvm::outs() << "Event prepared.\n";
17511628

17521629
if (P.isCompute()) {
@@ -1756,33 +1633,33 @@ class DXDevice : public offloadtest::Device {
17561633
std::errc::invalid_argument,
17571634
"Compute pipeline must have exactly one compute shader.");
17581635
if (auto Err = createComputePSO(P.Shaders[0].Shader->getBuffer(), State))
1759-
return Err;
1636+
return waitThenReturnErr(std::move(Err), State);
17601637
llvm::outs() << "PSO created.\n";
17611638
if (auto Err = createComputeCommands(P, State))
1762-
return Err;
1639+
return waitThenReturnErr(std::move(Err), State);
17631640
llvm::outs() << "Compute command list created.\n";
17641641

17651642
} else {
17661643
// Create render target, readback and vertex buffer and PSO.
17671644
if (auto Err = createRenderTarget(P, State))
1768-
return Err;
1645+
return waitThenReturnErr(std::move(Err), State);
17691646
llvm::outs() << "Render target created.\n";
17701647
if (auto Err = createVertexBuffer(P, State))
1771-
return Err;
1648+
return waitThenReturnErr(std::move(Err), State);
17721649
llvm::outs() << "Vertex buffer created.\n";
17731650
if (auto Err = createGraphicsPSO(P, State))
1774-
return Err;
1651+
return waitThenReturnErr(std::move(Err), State);
17751652
llvm::outs() << "Graphics PSO created.\n";
17761653
if (auto Err = createGraphicsCommands(P, State))
1777-
return Err;
1654+
return waitThenReturnErr(std::move(Err), State);
17781655
llvm::outs() << "Graphics command list created complete.\n";
17791656
}
17801657

17811658
if (auto Err = executeCommandList(State))
1782-
return Err;
1659+
return waitThenReturnErr(std::move(Err), State);
17831660
llvm::outs() << "Compute commands executed.\n";
17841661
if (auto Err = readBack(P, State))
1785-
return Err;
1662+
return waitThenReturnErr(std::move(Err), State);
17861663
llvm::outs() << "Read data back.\n";
17871664

17881665
return llvm::Error::success();

0 commit comments

Comments
 (0)