@@ -151,7 +151,7 @@ static D3D12_RESOURCE_DESC getResourceDescription(const Resource &R) {
151151 const uint32_t Height = R.isTexture () ? B.OutputProps .Height : 1 ;
152152 D3D12_TEXTURE_LAYOUT Layout;
153153 if (R.isTexture ())
154- Layout = getDXKind (R.Kind ) == SRV
154+ Layout = getDXKind (R.Kind ) == SRV || getDXKind (R. Kind ) == UAV
155155 ? D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE
156156 : D3D12_TEXTURE_LAYOUT_UNKNOWN;
157157 else
@@ -528,6 +528,18 @@ class DXDevice : public offloadtest::Device {
528528 addUploadEndBarrier (IS, Destination, R.isReadWrite ());
529529 }
530530
531+ UINT getNumTiles (std::optional<int > NumTiles, UINT64 Width) {
532+ UINT Ret;
533+ if (NumTiles.has_value ())
534+ Ret = static_cast <UINT>(*NumTiles);
535+ else
536+ // Map the entire buffer by computing how many 64KB tiles cover it
537+ Ret = static_cast <UINT>(
538+ (Width + D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES - 1 ) /
539+ D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES);
540+ return Ret;
541+ }
542+
531543 llvm::Expected<ResourceBundle> createSRV (Resource &R, InvocationState &IS) {
532544 ResourceBundle Bundle;
533545 const D3D12_RESOURCE_DESC ResDesc = getResourceDescription (R);
@@ -566,15 +578,7 @@ class DXDevice : public offloadtest::Device {
566578 return Err;
567579
568580 // Tile mapping setup (optional if NumTiles > 0)
569- UINT NumTiles = 0 ;
570- if (R.TilesMapped .has_value ()) {
571- NumTiles = static_cast <UINT>(*R.TilesMapped );
572- } else {
573- // Map the entire buffer by computing how many 64KB tiles cover it
574- NumTiles = static_cast <UINT>(
575- (ResDesc.Width + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT - 1 ) /
576- D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT);
577- }
581+ const UINT NumTiles = getNumTiles (R.TilesMapped , ResDesc.Width );
578582 ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
579583
580584 if (NumTiles > 0 ) {
@@ -660,144 +664,7 @@ class DXDevice : public offloadtest::Device {
660664 return HeapIdx;
661665 }
662666
663- llvm::Expected<ResourceBundle> createReservedUAV (Resource &R,
664- InvocationState &IS) {
665- ResourceBundle Bundle;
666- const uint32_t BufferSize = getUAVBufferSize (R);
667- const D3D12_RESOURCE_DESC ResDesc = getResourceDescription (R);
668-
669- const D3D12_HEAP_PROPERTIES ReadBackHeapProp =
670- CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_READBACK);
671- const D3D12_RESOURCE_DESC ReadBackResDesc = {
672- D3D12_RESOURCE_DIMENSION_BUFFER,
673- 0 ,
674- BufferSize,
675- 1 ,
676- 1 ,
677- 1 ,
678- DXGI_FORMAT_UNKNOWN,
679- {1 , 0 },
680- D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
681- D3D12_RESOURCE_FLAG_NONE};
682-
683- const D3D12_RESOURCE_DESC UploadResDesc =
684- CD3DX12_RESOURCE_DESC::Buffer (BufferSize);
685-
686- uint32_t RegOffset = 0 ;
687- for (const auto &ResData : R.BufferPtr ->Data ) {
688- llvm::outs () << " Creating UAV: { Size = " << BufferSize
689- << " , Register = u" << R.DXBinding .Register + RegOffset
690- << " , Space = " << R.DXBinding .Space
691- << " , HasCounter = " << R.HasCounter ;
692- if (R.TilesMapped )
693- llvm::outs () << " , TilesMapped = " << *R.TilesMapped ;
694- llvm::outs () << " }\n " ;
695-
696- // Reserved UAV resource
697- ComPtr<ID3D12Resource> Buffer;
698- if (auto Err =
699- HR::toError (Device->CreateReservedResource (
700- &ResDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr ,
701- IID_PPV_ARGS (&Buffer)),
702- " Failed to create reserved resource (buffer)." ))
703- return Err;
704-
705- // Committed Upload Buffer (CPU visible)
706- ComPtr<ID3D12Resource> UploadBuffer;
707- const D3D12_HEAP_PROPERTIES UploadHeapProps =
708- CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_UPLOAD);
709-
710- if (auto Err = HR::toError (
711- Device->CreateCommittedResource (
712- &UploadHeapProps, D3D12_HEAP_FLAG_NONE, &UploadResDesc,
713- D3D12_RESOURCE_STATE_GENERIC_READ, nullptr ,
714- IID_PPV_ARGS (&UploadBuffer)),
715- " Failed to create committed resource (upload buffer)." ))
716- return Err;
717-
718- // Readback buffer (committed)
719- ComPtr<ID3D12Resource> ReadBackBuffer;
720- if (auto Err = HR::toError (
721- Device->CreateCommittedResource (
722- &ReadBackHeapProp, D3D12_HEAP_FLAG_NONE, &ReadBackResDesc,
723- D3D12_RESOURCE_STATE_COPY_DEST, nullptr ,
724- IID_PPV_ARGS (&ReadBackBuffer)),
725- " Failed to create committed resource (readback buffer)." ))
726- return Err;
727-
728- // Tile mapping setup (optional if NumTiles > 0)
729- const UINT NumTiles = static_cast <UINT>(*R.TilesMapped );
730- ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
731-
732- if (NumTiles > 0 ) {
733- std::vector<D3D12_TILED_RESOURCE_COORDINATE> StartCoords (NumTiles);
734- std::vector<D3D12_TILE_REGION_SIZE> RegionSizes (NumTiles);
735- std::vector<D3D12_TILE_RANGE_FLAGS> RangeFlags (
736- NumTiles, D3D12_TILE_RANGE_FLAG_NONE);
737- std::vector<UINT> HeapRangeStartOffsets (NumTiles);
738- std::vector<UINT> RangeTileCounts (NumTiles, 1 );
739-
740- // Create a heap large enough for the mapped tiles
741- D3D12_HEAP_DESC HeapDesc = {};
742- HeapDesc.Properties = CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_DEFAULT);
743- HeapDesc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
744- HeapDesc.SizeInBytes = static_cast <UINT64>(NumTiles) *
745- D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
746- HeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
747-
748- if (auto Err =
749- HR::toError (Device->CreateHeap (&HeapDesc, IID_PPV_ARGS (&Heap)),
750- " Failed to create heap for tiled UAV resource." ))
751- return Err;
752-
753- // Fill tile coordinates and region sizes
754- for (UINT I = 0 ; I < NumTiles; ++I) {
755- StartCoords[I] = {I, 0 , 0 , 0 };
756- RegionSizes[I].NumTiles = 1 ;
757- RegionSizes[I].UseBox = FALSE ;
758- HeapRangeStartOffsets[I] = I;
759- }
760-
761- // Retrieve a command queue from InvocationState
762- ID3D12CommandQueue *CommandQueue = IS.Queue .Get ();
763-
764- // Map the first NumTiles tiles in the Buffer
765- CommandQueue->UpdateTileMappings (
766- Buffer.Get (), NumTiles, StartCoords.data (), RegionSizes.data (),
767- Heap.Get (), NumTiles, RangeFlags.data (),
768- HeapRangeStartOffsets.data (), RangeTileCounts.data (),
769- D3D12_TILE_MAPPING_FLAG_NONE);
770- }
771-
772- // Upload data initialization
773- void *ResDataPtr = nullptr ;
774- const D3D12_RANGE Range = {0 , 0 }; // no reads expected
775- if (SUCCEEDED (UploadBuffer->Map (0 , &Range, &ResDataPtr))) {
776- memcpy (ResDataPtr, ResData.get (), R.size ());
777- // Zero remaining bytes if the buffer is padded
778- if (R.size () < BufferSize) {
779- memset (static_cast <char *>(ResDataPtr) + R.size (), 0 ,
780- BufferSize - R.size ());
781- }
782- UploadBuffer->Unmap (0 , nullptr );
783- } else {
784- return llvm::createStringError (std::errc::io_error,
785- " Failed to map upload buffer." );
786- }
787-
788- // Add GPU upload commands
789- addResourceUploadCommands (R, IS, Buffer, UploadBuffer);
790-
791- // Store heap in Bundle so it lives until caller releases the Bundle
792- Bundle.emplace_back (UploadBuffer, Buffer, ReadBackBuffer, Heap);
793- RegOffset++;
794- }
795-
796- return Bundle;
797- }
798-
799- llvm::Expected<ResourceBundle> createCommittedUAV (Resource &R,
800- InvocationState &IS) {
667+ llvm::Expected<ResourceBundle> createUAV (Resource &R, InvocationState &IS) {
801668 ResourceBundle Bundle;
802669 const uint32_t BufferSize = getUAVBufferSize (R);
803670
@@ -872,13 +739,7 @@ class DXDevice : public offloadtest::Device {
872739 }
873740 return Bundle;
874741 }
875-
876- llvm::Expected<ResourceBundle> createUAV (Resource &R, InvocationState &IS) {
877- if (R.TilesMapped )
878- return createReservedUAV (R, IS);
879- return createCommittedUAV (R, IS);
880- }
881-
742+
882743 // returns the next available HeapIdx
883744 uint32_t bindUAV (Resource &R, InvocationState &IS, uint32_t HeapIdx,
884745 ResourceBundle ResBundle) {
@@ -1221,6 +1082,9 @@ class DXDevice : public offloadtest::Device {
12211082 }
12221083
12231084 void addReadbackBeginBarrier (InvocationState &IS, ComPtr<ID3D12Resource> R) {
1085+ const D3D12_RESOURCE_BARRIER b = CD3DX12_RESOURCE_BARRIER::UAV (R.Get ());
1086+ IS.CmdList ->ResourceBarrier (1 , &b);
1087+
12241088 const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition (
12251089 R.Get (), D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
12261090 D3D12_RESOURCE_STATE_COPY_SOURCE);
@@ -1703,6 +1567,18 @@ class DXDevice : public offloadtest::Device {
17031567 return llvm::Error::success ();
17041568 }
17051569
1570+ llvm::Error waitThenReturnErr (llvm::Error Err, InvocationState &IS) {
1571+ // Wait on the GPU before returning the error
1572+ llvm::Error WaitErr = waitForSignal (IS);
1573+ if (WaitErr)
1574+ // joinErrors returns an Error by value (move-only). Just return it
1575+ // directly.
1576+ return llvm::joinErrors (std::move (WaitErr), std::move (Err));
1577+
1578+ // No waiting error, just return the moved original.
1579+ return Err;
1580+ }
1581+
17061582 llvm::Error executeProgram (Pipeline &P) override {
17071583 llvm::sys::AddSignalHandler (
17081584 [](void *Cookie) {
@@ -1746,7 +1622,8 @@ class DXDevice : public offloadtest::Device {
17461622 return Err;
17471623 llvm::outs () << " Buffers created.\n " ;
17481624 if (auto Err = createEvent (State))
1749- return Err;
1625+ return waitThenReturnErr (std::move (Err), State);
1626+
17501627 llvm::outs () << " Event prepared.\n " ;
17511628
17521629 if (P.isCompute ()) {
@@ -1756,33 +1633,33 @@ class DXDevice : public offloadtest::Device {
17561633 std::errc::invalid_argument,
17571634 " Compute pipeline must have exactly one compute shader." );
17581635 if (auto Err = createComputePSO (P.Shaders [0 ].Shader ->getBuffer (), State))
1759- return Err;
1636+ return waitThenReturnErr ( std::move ( Err), State) ;
17601637 llvm::outs () << " PSO created.\n " ;
17611638 if (auto Err = createComputeCommands (P, State))
1762- return Err;
1639+ return waitThenReturnErr ( std::move ( Err), State) ;
17631640 llvm::outs () << " Compute command list created.\n " ;
17641641
17651642 } else {
17661643 // Create render target, readback and vertex buffer and PSO.
17671644 if (auto Err = createRenderTarget (P, State))
1768- return Err;
1645+ return waitThenReturnErr ( std::move ( Err), State) ;
17691646 llvm::outs () << " Render target created.\n " ;
17701647 if (auto Err = createVertexBuffer (P, State))
1771- return Err;
1648+ return waitThenReturnErr ( std::move ( Err), State) ;
17721649 llvm::outs () << " Vertex buffer created.\n " ;
17731650 if (auto Err = createGraphicsPSO (P, State))
1774- return Err;
1651+ return waitThenReturnErr ( std::move ( Err), State) ;
17751652 llvm::outs () << " Graphics PSO created.\n " ;
17761653 if (auto Err = createGraphicsCommands (P, State))
1777- return Err;
1654+ return waitThenReturnErr ( std::move ( Err), State) ;
17781655 llvm::outs () << " Graphics command list created complete.\n " ;
17791656 }
17801657
17811658 if (auto Err = executeCommandList (State))
1782- return Err;
1659+ return waitThenReturnErr ( std::move ( Err), State) ;
17831660 llvm::outs () << " Compute commands executed.\n " ;
17841661 if (auto Err = readBack (P, State))
1785- return Err;
1662+ return waitThenReturnErr ( std::move ( Err), State) ;
17861663 llvm::outs () << " Read data back.\n " ;
17871664
17881665 return llvm::Error::success ();
0 commit comments