-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathpalPipeline.h
888 lines (792 loc) · 43.2 KB
/
palPipeline.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2024 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palPipeline.h
* @brief Defines the Platform Abstraction Library (PAL) IPipeline interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palGpuMemoryBindable.h"
#include "palDestroyable.h"
#include "palImage.h"
#include "palShaderLibrary.h"
#include "palSpan.h"
#include <utility>
namespace Util
{
namespace Abi
{
union ApiHwShaderMapping;
enum class HardwareStage : uint32;
}
namespace HsaAbi
{
struct KernelArgument;
}
}
namespace Pal
{
struct GpuMemSubAllocInfo;
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 848
enum class PrimitiveTopology : uint8;
#else
enum class PrimitiveTopology : uint32;
#endif
/// Specifies a shader type (i.e., what stage of the pipeline this shader was written for).
enum class ShaderType : uint32
{
Compute = 0,
Task,
Vertex,
Hull,
Domain,
Geometry,
Mesh,
Pixel,
Count
};
/// Number of shader program types supported by PAL.
constexpr uint32 NumShaderTypes = static_cast<uint32>(ShaderType::Count);
/// Maximum number of viewports.
constexpr uint32 MaxViewports = 16;
/// Maximum number of supported stream-output declaration entries by any PAL device.
constexpr uint32 MaxStreamOutEntries = 512;
/// Specifies a general primitive category without differentiating between a strip or list and without specifying
/// whether a the primitive will include adjacency info or not.
enum class PrimitiveType : uint32
{
Point = 0x0,
Line = 0x1,
Triangle = 0x2,
Rect = 0x3,
Quad = 0x4,
Patch = 0x5,
Count
};
/// Specifies the target range of Z values after viewport transform.
enum class DepthRange : uint32
{
ZeroToOne = 0x0,
NegativeOneToOne = 0x1,
};
/// Specifies whether the v/t texture coordinates of a point sprite map 0 to 1 from top to bottom or bottom to top.
enum class PointOrigin : uint32
{
UpperLeft = 0x0,
LowerLeft = 0x1,
Count
};
/// Specifies primitive's shade mode.
enum class ShadeMode : uint32
{
Gouraud = 0x0, ///< Gouraud shading mode, pixel shader input is interpolation of vertex
Flat = 0x1, ///< Flat shading mode, pixel shader input from provoking vertex
Count
};
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 869
/// Specifies pixel shader shading rate
enum class PsShadingRate : uint32
{
Default = 0x0, ///< Let PS specify the shading rate
SampleRate = 0x1, ///< Forced per-sample shading rate
PixelRate = 0x2 ///< Forced per-pixel shading rate
};
#endif
/// Defines a logical operation applied between the color coming from the pixel shader and the current value in the
/// target image.
enum class LogicOp : uint32
{
Copy = 0x0,
Clear = 0x1,
And = 0x2,
AndReverse = 0x3,
AndInverted = 0x4,
Noop = 0x5,
Xor = 0x6,
Or = 0x7,
Nor = 0x8,
Equiv = 0x9,
Invert = 0xA,
OrReverse = 0xB,
CopyInverted = 0xC,
OrInverted = 0xD,
Nand = 0xE,
Set = 0xF,
};
/// Shader Engine Dispatch Interleave Size
///
/// This determines how many Threads or Threadgroups are sent to one SE before switching to the next SE.
/// Work is always distributed in Threadgroups though.
///
/// The 1D values are specified in Threads and the Threadgroups are walked in a 1D typewriter fashion.
///
/// Disable means that every Threadgroup is issued to the next SE.
enum class DispatchInterleaveSize : uint32
{
#if (PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 823)
Default,
Disable,
_1D_64_Threads,
_1D_128_Threads,
_1D_256_Threads,
_1D_512_Threads,
Count,
#else
Default = 0x0,
Disable = 0x1,
_128 = 0x2,
_256 = 0x3,
_512 = 0x4,
Count
#endif
};
/// Specifies whether to override binning setting for pipeline. Enum value of Default follows the PBB global setting.
/// Enable or Disable value overrides PBB global setting for the pipeline and sets binning accordingly.
enum class BinningOverride : uint32
{
Default = 0x0,
Disable = 0x1,
Enable = 0x2,
Count
};
/// GPU behavior is controlled by LDS_GROUP_SIZE.
enum class LdsPsGroupSizeOverride : uint32
{
Default = 0x0,
SingleWave = 0x1,
DoubleWaves = 0x2
};
/// Tri-state enum which controls enabling or disabling a feature or behavior, or letting PAL select a sensible default
enum class OverrideMode : int32
{
Default = -1, ///< PAL selects the default behavior, which could be either enabled or disabled.
Disabled = 0, ///< Force to disabled. Equal to set to False.
Enabled = 1, ///< Force to enabled. Equal to set to True.
};
/// Enumerates the depth clamping modes a pipeline can use.
enum class DepthClampMode : uint32
{
Viewport = 0x0, ///< Clamps to the viewport min/max depth bounds
_None = 0x1, ///< Disables depth clamping
#if PAL_BUILD_SUPPORT_DEPTHCLAMPMODE_ZERO_TO_ONE
ZeroToOne = 0x2, ///< Clamps between 0.0 and 1.0.
#endif
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< Disables depth clamping
#endif
};
/// Common flags controlling creation of both compute and graphics pipeline.
union PipelineCreateFlags
{
struct
{
uint32 clientInternal : 1; ///< Internal pipeline not created by the application.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 837
uint32 supportDynamicDispatch : 1; ///< Pipeline will be used with @ref ICmdBuffer::CmdDynamicDispatch.
#endif
uint32 reserved1 : 1; ///< Reserved.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Constant definining the max number of view instance count that is supported.
constexpr uint32 MaxViewInstanceCount = 6;
/// Specifies graphic pipeline view instancing state.
struct ViewInstancingDescriptor
{
uint32 viewInstanceCount; ///< The view instance count of the graphic pipeline
uint32 viewId[MaxViewInstanceCount]; ///< The view instance ids.
uint32 renderTargetArrayIdx[MaxViewInstanceCount]; ///< The instance render target array index, can be
/// used in hardware accelerated stereo rendering.
uint16 viewportArrayIdx[MaxViewInstanceCount]; ///< The instance viewport array index, can be
/// used in hardware accelerated stereo rendering.
bool enableMasking; ///< Indicate whether instance masking is enabled.
};
// Specifies the input parameters for the MSAA coverage out feature. MSAA coverage out is used in conjunction with a
// single sampled color image. This feature exports a mask indicating which samples would have been used if the
// image had been multi-sampled. The mask is exported to the specified channel of the MRT pointing to the rendered
// image. That is, the MRT must be an active bound render target. This MSAA mask data can then be post-processed.
struct MsaaCoverageOutDescriptor
{
union
{
struct
{
uint32 enable : 1; ///< Set to true to enable render target channel output
uint32 numSamples : 4; ///< Number of samples to export
uint32 mrt : 3; ///< Which MRT to export to.
uint32 channel : 2; ///< Which channel to export to (x = 0, y = 1, z = 2, w = 3)
uint32 reserved : 22;
};
uint32 u32All;
} flags;
};
/// Specifies properties about an indirect function belonging to a compute @ref IPipelne object. Part of the input
/// structure to IDevice::CreateComputePipeline().
struct ComputePipelineIndirectFuncInfo
{
const char* pSymbolName; ///< ELF Symbol name for the associated function. Must not be null.
gpusize gpuVirtAddr; ///< [out] GPU virtual address of the function. This is computed by PAL during
/// pipeline creation.
};
/// Specifies properties for creation of a compute @ref IPipeline object. Input structure to
/// IDevice::CreateComputePipeline().
struct ComputePipelineCreateInfo
{
PipelineCreateFlags flags; ///< Flags controlling pipeline creation.
const void* pPipelineBinary; ///< Pointer to Pipeline ELF binary implementing the Pipeline ABI
/// interface. The Pipeline ELF contains pre-compiled shaders,
/// register values, and additional metadata.
size_t pipelineBinarySize; ///< Size of Pipeline ELF binary in bytes.
uint32 maxFunctionCallDepth; ///< Maximum depth for indirect function calls. Not used for a new
/// path ray-tracing pipeline as the compiler has pre-calculated
/// stack requirements.
bool disablePartialDispatchPreemption; ///< Prevents scenarios where a subset of the dispatched thread groups are
/// preempted and the remaining thread groups run to completion. This
/// can occur when thread group granularity preemption is available and
/// instruction level (CWSR) is not. This setting is useful for allowing
/// dispatches with interdependent thread groups.
DispatchInterleaveSize interleaveSize; ///< Controls how many thread groups are sent to one SE before switching to
/// the next one.
/// PAL expects a fixed 3D thread group size for each compute pipeline but the HSA ABI supports dynamic group sizes.
/// If this pipeline's ELF binary metadata doesn't specify a fixed thread group size, this should be used to force
/// a particular thread group size. If this extent is set to all zeros PAL will use the metadata's group size.
/// This field is not supported on PAL ABI ELFs, it should be set to all zeros.
Extent3d threadsPerGroup;
const char* pKernelName; ///< When create pipeline with hsa ELF binary of multiple kernels, need to set one
/// kernel to create the pipeline. null means only one kernel in ELF binary.
};
/// Specifies information about the viewport behavior of an assembled graphics pipeline. Part of the input
/// structure @ref GraphicsPipelineCreateInfo.
struct ViewportInfo
{
bool depthClipNearEnable; ///< Enable clipping based on Near Z coordinate.
bool depthClipFarEnable; ///< Enable clipping based on Far Z coordinate.
DepthRange depthRange; ///< Specifies Z dimensions of screen space (i.e., post viewport transform:
/// 0 to 1 or -1 to 1).
};
/// Specifies edgeRule for rasterization
enum class EdgeRuleMode : uint32
{
D3dCompliant = 0x0, ///< Use rasterization edge-rules which comply with the D3D spec.
OpenGlDefault = 0x1, ///< Use rasterization edge-rules compatible with the default OpenGL driver.
};
/// Specifies Rasterizer state in properties for creation of a graphics
struct RasterizerState
{
PointOrigin pointCoordOrigin; ///< Controls texture coordinate orientation for point sprites.
bool expandLineWidth; ///< If true, line primitives will have their width expanded by 1/cos(a)
/// where a is the minimum angle from horizontal or vertical.
/// This can be used in conjunction with PS patching for a client to
/// implement line antialiasing.
ShadeMode shadeMode; ///< Specifies shading mode, Gouraud or Flat
bool rasterizeLastLinePixel; ///< Specifies whether to draw last pixel in a line.
bool outOfOrderPrimsEnable; ///< Enables out-of-order primitive rasterization. PAL silently
/// ignores this if it is unsupported in hardware.
bool perpLineEndCapsEnable; ///< Forces the use of perpendicular line end caps as opposed to
/// axis-aligned line end caps during line rasterization.
BinningOverride binningOverride; ///< Binning setting for this pipeline.
DepthClampMode depthClampMode; ///< Depth clamping behavior
union
{
struct
{
uint8 clipDistMaskValid : 1; ///< Whether or not @ref clipDiskMask, below, is valid.
uint8 cullDistMaskValid : 1; ///< Whether or not @ref cullDistMask, below, is valid.
uint8 reserved : 6;
};
uint8 u8All; ///< All the flags as a single value.
} flags;
uint8 cullDistMask; ///< Mask of which cullDistance exports to leave enabled.
uint8 clipDistMask; ///< Mask of which clipDistance exports to leave enabled.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 869
PsShadingRate forcedShadingRate; ///< Forced PS shading rate
#endif
bool dx10DiamondTestDisable; ///< Disable DX10 diamond test during line rasterization.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 804
EdgeRuleMode edgeRule;
#endif
};
/// Specifies Per-MRT color target info in olor target state
struct ColorTargetInfo
{
SwizzledFormat swizzledFormat; ///< Color target format and channel swizzle. Set the format to invalid
/// if no color target will be bound at this slot.
uint8 channelWriteMask; ///< Color target write mask. Bit 0 controls the red channel, bit 1 is
/// green, bit 2 is blue, and bit 3 is alpha.
bool forceAlphaToOne; ///< Treat alpha as one regardless of the shader output. Ignored unless
/// supportAlphaToOne is set in DeviceProperties.
};
/// Specifies color target state in properties for creation of a graphics
struct ColorTargetState
{
bool alphaToCoverageEnable; ///< Enable alpha to coverage.
bool dualSourceBlendEnable; ///< Blend state bound at draw time will use a dual source blend mode.
LogicOp logicOp; ///< Logic operation to perform.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 904
bool uavExportSingleDraw; ///< When UAV export is enabled, acts as a hint that only a single draw
/// is done on a color target with this or subsequent pipelines before
/// a barrier. Improves performance by allowing pipelines to overlap.
#endif
ColorTargetInfo target[MaxColorTargets]; ///< Per-MRT color target info.
};
/// Specifies properties for creation of a graphics @ref IPipeline object. Input structure to
/// IDevice::CreateGraphicsPipeline().
struct GraphicsPipelineCreateInfo
{
PipelineCreateFlags flags; ///< Flags controlling pipeline creation.
const void* pPipelineBinary; ///< Pointer to Pipeline ELF binary implementing the Pipeline ABI
/// interface. The Pipeline ELF contains pre-compiled shaders,
/// register values, and additional metadata.
size_t pipelineBinarySize; ///< Size of Pipeline ELF binary in bytes.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 816
const IShaderLibrary** ppShaderLibraries; ///< An array of graphics @ref IShaderLibrary object. pPipelineBinary
/// and ppShaderLibraries can't be valid at the same time.
size_t numShaderLibraries; ///< Number of graphics shaderLibrary object in ppShaderLibraries.
#endif
bool useLateAllocVsLimit; ///< If set, use the specified lateAllocVsLimit instead of PAL internally
/// determining the limit.
uint32 lateAllocVsLimit; ///< The number of VS waves that can be in flight without having param
/// cache and position buffer space. If useLateAllocVsLimit flag is set,
/// PAL will use this limit instead of the PAL-specified limit.
bool useLateAllocGsLimit; ///< If set, use the specified lateAllocVsLimit instead of PAL internally
/// determining the limit.
uint32 lateAllocGsLimit; ///< Controls GS LateAlloc val (for pos/prim allocations NOT param cache)
/// on NGG pipelines. Can be no more than 127.
struct
{
struct
{
PrimitiveType primitiveType; ///< Basic primitive category: points, line, triangles, patches.
bool topologyIsPolygon; ///< Indicates that triangle primitives are combined to represent more
/// complex polygons. Only valid for triangle primitive types.
uint32 patchControlPoints; ///< Number of control points per patch. Only required if primitiveType
/// is PrimitiveType::Patch.
} topologyInfo; ///< Various information about the primitive topology that will be used with this pipeline.
/// All of this info must be consistent with the full topology specified by
/// ICmdBuffer::SetPrimitiveTopology() when drawing with this pipeline bound.
/// Number of vertex buffer slots which are accessed by this pipeline. Behavior is undefined if the pipeline
/// tries to access a vertex buffer slot outside the range [0, vertexBufferCount). It is generally advisable
/// to make this the minimum value possible because that reduces the number of vertex buffer slots PAL has to
/// maintain for this pipeline when recording command buffers.
uint32 vertexBufferCount;
} iaState; ///< Input assembler state.
RasterizerState rsState; ///< Rasterizer state.
ColorTargetState cbState; ///< Color target state.
ViewInstancingDescriptor viewInstancingDesc; ///< Descriptor describes view instancing state
/// of the graphics pipeline
MsaaCoverageOutDescriptor coverageOutDesc; ///< Descriptor describes input parameters for MSAA coverage out.
ViewportInfo viewportInfo; ///< Viewport info.
DispatchInterleaveSize taskInterleaveSize; ///< Ignored for pipelines without a task shader. For pipelines with
/// a task shader, controls how many thread groups are sent to one
/// SE before switching to the next one.
LdsPsGroupSizeOverride ldsPsGroupSizeOverride; ///< Whether to override ldsPsGroupSize setting for pipeline.
};
/// The graphic pipeline view instancing information. This is used to determine if hardware accelerated stereo rendering
/// can be enabled for a graphic pipeline.
struct GraphicPipelineViewInstancingInfo
{
union
{
struct
{
uint32 shaderUseViewId : 1; ///< If any shader in pipeline uses view id.
uint32 gsExportRendertargetArrayIndex : 1; ///< If gs exports render target array index,
/// must be 0 if there is no gs.
uint32 gsExportViewportArrayIndex : 1; ///< If gs exports viewport array index,
/// must be 0 if there is no gs.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 apiShaderFlags;
};
const ViewInstancingDescriptor* pViewInstancingDesc; ///< View Instancing descriptor
};
/// Reports properties of a compiled pipeline. This includes hashes for the pipeline and shaders that the client can
/// use to correlate PAL pipeline/shader dumps with corresponding API-level pipelines/shaders.
struct PipelineInfo
{
PipelineHash internalPipelineHash; ///< 128-bit identifier extracted from this pipeline's ELF binary, composed of
/// the state the compiler decided was appropriate to identify the compiled
/// shaders. The lower 64 bits are "stable"; the upper 64 bits are "unique".
struct
{
ShaderHash hash; ///< Unique 128-bit identifier for this shader. 0 indicates there is no shader bound for
/// the corresponding shader stage.
} shader[NumShaderTypes]; ///< Array of per-shader pipeline properties.
union
{
struct
{
uint32 hsaAbi : 1; ///< This pipeline uses the HSA ABI (i.e. bind arguments not user-data)
uint32 usesCps : 1; ///< This pipeline uses continuations passing shaders (CPS). In an archive pipeline,
/// this bit is set iff it is set in any constituent pipeline.
uint32 cpsGlobal : 1; ///< If using continuations passing shaders (CPS), stack is in global rather than
/// scratch. In an archive pipeline, this bit is set iff it is set in any
/// constituent pipeline.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< All flags combined as a single uint32.
} flags; ///< Pipeline properties.
struct
{
union
{
struct
{
uint32 perSampleShading : 1; ///< Shader instructions want per-sample execution.
uint32 usesSampleMask : 1; ///< Shader is using sample mask.
uint32 enablePops : 1; ///< Primitive order pixel shader is enabled.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< All flags combined as a single uint32.
} flags;
} ps; ///< Pixel shader properties.
uint64 resourceMappingHash; ///< 64-bit hash of the resource mapping used when compiling the pipeline,
/// if available (0 otherwise).
};
/// A structure that represents any 3D arrangement of threads or thread groups as part of a compute shader dispatch.
///
/// This structure is halfway between Extent3d and Offset3d, depending on the context it may represent an offset or
/// an extent. Essentially it's meaning is tied to the concept of 3D thread or thread group grids rather than generic
/// contexts like "extent" or "offset". Whether it represents threads or thread groups is also context specific.
struct DispatchDims
{
uint32 x; ///< Threads or thread groups in the X dimension.
uint32 y; ///< Threads or thread groups in the Y dimension.
uint32 z; ///< Threads or thread groups in the Z dimension.
/// Computes the volume of this 3D arrangement of threads or thread groups.
///
/// @returns the total number of threads or threads groups this struct represents.
uint32 Flatten() const { return x * y * z; }
};
// There are some places where we'd like to directly cast DispatchDims to an array of three uint32s.
static_assert(sizeof(DispatchDims) == sizeof(uint32) * 3, "DispatchDims not castable to uint32*");
/// Component-wise addition of two DispatchDims.
///
/// @param [in] l The left-hand argument.
/// @param [in] r The right-hand argument.
///
/// @returns A new DispatchDims which contains the sum of 'l' and 'r' along each dimension.
inline DispatchDims operator+(DispatchDims l, DispatchDims r) { return {l.x + r.x, l.y + r.y, l.z + r.z}; }
/// Component-wise addition of one DispatchDims into another.
///
/// @param [in] l The left-hand argument.
/// @param [in] r The right-hand argument.
///
/// @returns A reference to 'l' after it is updated to the sum of 'l' and 'r'.
inline DispatchDims& operator+=(DispatchDims& l, DispatchDims r) { return l = (l + r); }
/// Component-wise multiplication of two DispatchDims.
///
/// @param [in] l The left-hand argument.
/// @param [in] r The right-hand argument.
///
/// @returns A new DispatchDims which contains the product of 'l' and 'r' along each dimension.
inline DispatchDims operator*(DispatchDims l, DispatchDims r) { return {l.x * r.x, l.y * r.y, l.z * r.z}; }
/// Component-wise multiplication of one DispatchDims into another.
///
/// @param [in] l The left-hand argument.
/// @param [in] r The right-hand argument.
///
/// @returns A reference to 'l' after it is updated to the product of 'l' and 'r'.
inline DispatchDims& operator*=(DispatchDims& l, DispatchDims r) { return l = (l * r); }
/// Used to represent API level shader stage.
enum ShaderStageFlagBits : uint32
{
ApiShaderStageCompute = (1u << static_cast<uint32>(ShaderType::Compute)),
ApiShaderStageTask = (1u << static_cast<uint32>(ShaderType::Task)),
ApiShaderStageVertex = (1u << static_cast<uint32>(ShaderType::Vertex)),
ApiShaderStageHull = (1u << static_cast<uint32>(ShaderType::Hull)),
ApiShaderStageDomain = (1u << static_cast<uint32>(ShaderType::Domain)),
ApiShaderStageGeometry = (1u << static_cast<uint32>(ShaderType::Geometry)),
ApiShaderStageMesh = (1u << static_cast<uint32>(ShaderType::Mesh)),
ApiShaderStagePixel = (1u << static_cast<uint32>(ShaderType::Pixel)),
};
/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
struct ShaderStats
{
uint32 shaderStageMask; ///< Indicates the stages of the pipeline this shader is
/// used for. If multiple bits are set, it implies
/// shaders were merged. See @ref ShaderStageFlagBits.
CommonShaderStats common; ///< The shader compilation parameters for this shader.
/// Maximum number of VGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableVgprs;
/// Maximum number of SGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableSgprs;
size_t isaSizeInBytes; ///< Size of the shader ISA disassembly for this shader.
ShaderHash palShaderHash; ///< Internal hash of the shader compilation data used by PAL.
union
{
struct
{
uint32 writesUAV : 1; ///< This shader performs writes to UAVs.
uint32 writesDepth : 1; ///< Indicates explicit depth writes performed by the shader stage.
uint32 streamOut : 1; ///< The shader performs stream out of shader generated data.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< All flags combined as a single uint32.
} shaderOperations; ///< Flags depicting shader operations.
struct
{
DispatchDims numThreadsPerGroup; ///< Number of compute threads per thread group in X, Y, and Z dimensions.
} cs; ///< Parameters specific to compute shader only.
union
{
struct
{
uint8 copyShaderPresent : 1; ///< Indicates that the copy shader data is valid.
uint8 reserved : 7; ///< Reserved for future use.
};
uint8 u8All; ///< All the flags as a single value.
} flags; ///< Flags related to this shader data.
CommonShaderStats copyShader; ///< This data is valid only when the copyShaderPresent flag above is set.
};
/**
***********************************************************************************************************************
* @interface IPipeline
* @brief Monolithic object containing all shaders and a large amount of "shader adjacent" state. Separate concrete
* implementations will support compute or graphics pipelines.
*
* @see IDevice::CreateComputePipeline()
* @see IDevice::CreateGraphicsPipeline()
* @see IDevice::LoadPipeline()
***********************************************************************************************************************
*/
class IPipeline : public IDestroyable
{
public:
/// Returns PAL-computed properties of this pipeline and its corresponding shaders.
///
/// @returns Property structure describing this pipeline.
virtual const PipelineInfo& GetInfo() const = 0;
/// Returns a list of GPU memory allocations used by this pipeline.
///
/// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value
/// reports the number of GPU memory allocations.
/// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it
/// will reflect the number of allocations that make up this pipeline. If
/// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
/// of entries in the pAllocInfoList array. On output, pNumEntries reflects the
/// number of entries in pAllocInfoList that are valid.
/// @returns Success if the allocation info was successfully written to the buffer.
/// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
/// + ErrorInvalidPointer if pNumEntries is nullptr.
virtual Result QueryAllocationInfo(
size_t* pNumEntries,
GpuMemSubAllocInfo* const pAllocInfoList) const = 0;
/// Obtains the binary code object for this pipeline.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the pipeline ELF is written in the buffer. If null, the size required
/// for the pipeline ELF is given out in the location pSize.
///
/// @returns Success if the pipeline binary was fetched successfully.
/// +ErrorUnavailable if the pipeline binary was not fetched successfully.
virtual Result GetCodeObject(
uint32* pSize,
void* pBuffer) const = 0;
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 816
/// Obtains the pointer of code object with ELF format according to the shader type. Returned ELF object is not
/// guaranteed to be unique with different shader type, because a single code object can contain multiple shaders.
///
/// @param [in] shaderType The shader stage for which the code object are requested.
/// @param [out] pSize The size of the ELF binary.
///
/// @returns The pointer of ELF binary which contains requested shader stage.
virtual const void* GetCodeObjectWithShaderType(
ShaderType shaderType,
size_t* pSize) const = 0;
#endif
/// Obtains the shader pre and post compilation stats/params for the specified shader stage.
///
/// @param [in] shaderType The shader stage for which the stats are requested.
///
/// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
/// the shader stage mentioned in shaderType. This cannot be nullptr.
/// @param [in] getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
/// size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
/// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
/// +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
/// occured.
virtual Result GetShaderStats(
ShaderType shaderType,
ShaderStats* pShaderStats,
bool getDisassemblySize) const = 0;
/// Obtains the compiled shader ISA code for the shader stage specified.
///
/// @param [in] shaderType The shader stage for which the shader cache entry is requested.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the shader ISA code is written in the buffer. If null, the size required
/// for the shader ISA is given out in the location pSize.
///
/// @returns Success if the shader ISA code was fetched successfully.
/// +ErrorUnavailable if the shader ISA code was not fetched successfully.
virtual Result GetShaderCode(
ShaderType shaderType,
size_t* pSize,
void* pBuffer) const = 0;
/// Obtains the generated performance data for the shader stage specified.
///
/// @param [in] hardwareStage The hardware stage of the shader which the performance data is requested.
/// @param [in, out] pSize Represents the size of the performance data.
/// @param [out] pBuffer If non-null, the performance data is written in the buffer. If null, the size
/// required for the performance data is given out in the location pSize.
///
/// @returns Success if the performance data was fetched successfully.
/// +ErrorUnavailable if the performance data was not fetched successfully.
virtual Result GetPerformanceData(
Util::Abi::HardwareStage hardwareStage,
size_t* pSize,
void* pBuffer) = 0;
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 837
/// Creates a new dynamic launch descriptor for this pipeline. These descriptors are only usable as input to
/// @ref ICmdBuffer::CmdDispatchDynamic(). Each launch descriptor acts as a GPU-side "handle" to a pipeline and
/// a set of shader libraries it is linked with. The size of the launch descriptor can be queried from
/// @ref DeviceProperties. A size of 0 reported in DeviceProperties indicates that this feature is not supported.
///
/// Currently only supported on compute pipelines.
///
/// @param [in, out] pOut Launch descriptor to create or update. Must not be null.
/// @param [in] resolve The launch descriptor contains state from a previous link operation. Need to update
/// the descriptor during this operation.
///
/// @returns Success if the operation was successful. Other error codes may include:
/// + ErrorUnavailable if called on a graphics pipeline or a pipeline that does not support dynamic
/// launch. @ref PipelineCreateFlags
/// + ErrorInvalidPointer if pCpuAddr is null.
virtual Result CreateLaunchDescriptor(
void* pCpuAddr,
bool resolve) { return Result::Unsupported; }
#endif
/// Notifies PAL that this pipeline may make indirect function calls to any function contained within any of the
/// specified @ref IShaderLibrary objects. This gives PAL a chance to perform any late linking steps required to
/// valid execution of the possible function calls (this could include adjusting hardware resources such as GPRs
/// or LDS space for the pipeline).
///
/// This may be called multiple times on the same pipeline object. Subsequent calls do not invalidate the result
/// of previous calls.
///
/// This must be called prior to binding this pipeline to a command buffer which will make function calls into any
/// shader function contained within any of the specified libraries. Failure to comply is an error and will result
/// in undefined behavior.
///
/// Currently only supported on compute pipelines.
///
/// @param [in] ppLibraryList List of @ref IShaderLibrary object to link with.
/// @param [in] libraryCount Number of valid library objects in the ppLibraryList array.
///
/// @returns Success if the operation is successful. Other return codes may include:
/// + ErrorUnavailable if called on a graphics pipeline.
/// + ErrorBadPipelineData if any of the libraries in ppLibraryList are not compatible with this pipeline.
/// Reasons for incompatibility include (but are not limited to) different user-data mappings, different
/// wavefront sizes, and other reasons.
virtual Result LinkWithLibraries(
const IShaderLibrary*const* ppLibraryList,
uint32 libraryCount) = 0;
/// Sets the stack size for indirect function calls made by this pipeline. This may be smaller than or equal to the
/// stack size already determined during pipeline creation or during an earlier call to LinkWithLibraries() because
/// the client has access to more information about which functions contained in those libraries (or in the pipeline
/// itself) are actually going to be called.
///
/// Note that a future call to LinkWithLibraries() will invalidate this value and this should
/// be called again.
///
/// @param [in] stackSizeInBytes Client-specified stack size, in bytes.
virtual void SetStackSizeInBytes(
uint32 stackSizeInBytes) = 0;
/// Retrieve the stack sizes managed by compiler, including the frontend stack and the backend stack.
///
/// @param [out] pSizes To be filled with both the frontend stack size and the backend stack size, in bytes.
///
/// @returns SUCCESS
virtual Result GetStackSizes(
CompilerStackSizes* pSizes) const = 0;
/// Returns the API shader type to hardware stage mapping for the pipeline.
///
/// @returns The appropriate mapping for this pipeline.
virtual Util::Abi::ApiHwShaderMapping ApiHwShaderMapping() const = 0;
/// Given the zero-based position of a kernel argument, return a pointer to that argument's metadata.
///
/// @note Only compute pipelines using the HSA ABI have kernel arguments.
///
/// @param [in] index The zero-based position of the kernel argument to query.
///
/// @returns A pointer to the kernel argument's metadata, or null if this pipeline doesn't have this argument.
virtual const Util::HsaAbi::KernelArgument* GetKernelArgument(uint32 index) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const { return m_pClientData; }
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Get the array of underlying pipelines that this pipeline contains. For a normal non-multi-pipeline,
/// this returns a single-entry array pointing to the same IPipeline. For a multi-pipeline compiled in
/// dynamic launch mode, this returns an empty array. The contents of the returned array remain valid
/// until the IPipeline is destroyed.
///
/// @returns The array of underlying pipelines.
virtual Util::Span<const IPipeline* const> GetPipelines() const = 0;
/// Get the array of underlying shader libraries that this pipeline contains. For a normal non-multi-pipeline,
/// this returns the empty array. The contents of the returned array remain valid until the IPipeline is
/// destroyed.
///
/// @returns The array of underlying shader libraries.
virtual Util::Span<const IShaderLibrary* const> GetLibraries() const { return {}; }
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IPipeline() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IPipeline() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
IPipeline(const IPipeline&) = delete;
IPipeline& operator=(const IPipeline&) = delete;
};
} // Pal