-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathpalGpuMemory.h
675 lines (630 loc) · 43.1 KB
/
palGpuMemory.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2024 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palGpuMemory.h
* @brief Defines the Platform Abstraction Library (PAL) IGpuMemory interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
class IGpuMemory;
class IDevice;
class IImage;
enum class VaRange : uint32;
/// Specifies Base Level priority per GPU memory allocation as a hint to the memory manager in the event it needs to
/// select allocations to page out of their preferred heaps.
enum class GpuMemPriority : uint32
{
Unused = 0x0, ///< Indicates that the allocation is not currently being used at all, and should be the first
/// choice to be paged out.
VeryLow = 0x1, ///< Lowest priority to keep in its preferred heap.
Low = 0x2, ///< Low priority to keep in its preferred heap.
Normal = 0x3, ///< Normal priority to keep in its preferred heap.
High = 0x4, ///< High priority to keep in its preferred heap (e.g., render targets).
VeryHigh = 0x5, ///< Highest priority to keep in its preferred heap. Last choice to be paged out (e.g., page
/// tables, displayable allocations).
Count
};
/// Specifies a finer granularity to the base Level priority per GPU memory allocation as a hint to the memory manager
/// in the event it needs to select allocations to page out of their preferred heaps.
enum class GpuMemPriorityOffset : uint32
{
Offset0 = 0x0, ///< Same priority as Base Level
Offset1 = 0x1, ///< Next priority from Base Level
Offset2 = 0x2, ///< Next priority from Base Level.
Offset3 = 0x3, ///< Next priority from Base Level.
Offset4 = 0x4, ///< Next priority from Base Level.
Offset5 = 0x5, ///< Next priority from Base Level.
Offset6 = 0x6, ///< Next priority from Base Level
Offset7 = 0x7, ///< Highest priority from Base Level
Count
};
/// Speicfies access mode for unmapped pages in a virtual Gpu Memory.
enum class VirtualGpuMemAccessMode : uint32
{
Undefined = 0x0, ///< Used in situations where no special accessMode needed.
NoAccess = 0x1, ///< All accesses of unmapped pages will trigger a GPU page fault.
ReadZero = 0x2, ///< Reads of unmapped pages return zero, and writes are discarded.
Count
};
/// Controls the behavior of this allocation with respect to the MALL.
enum class GpuMemMallPolicy : uint32
{
Default = 0x0, ///< MALL policy is decided by the driver.
Never = 0x1, ///< This allocation is never put through the MALL.
Always = 0x2, ///< This allocation is always put through the MALL.
};
/// Bitmask of cases where RPM view memory accesses will bypass the MALL.
enum RpmViewsBypassMall : uint32
{
RpmViewsBypassMallOff = 0x0, ///< Disable MALL bypass
RpmViewsBypassMallOnRead = 0x1, ///< Skip MALL for read access of views created in RPM
RpmViewsBypassMallOnWrite = 0x2, ///< Skip MALL for write access of views created in RPM
RpmViewsBypassMallOnCbDbWrite = 0x4, ///< Control the RPM CB/DB behavior
};
/// Used for specifying a subregion of the allocation as having a different mall policy from the rest of the
/// allocation.
struct GpuMemMallRange
{
uint32 startPage; ///< Starting 4k page that will obey the specified mallPolicy.
uint32 numPages; ///< Number of 4k pages that will obey the specified mallPolicy.
};
/// Specifies flags for @ref IGpuMemory creation.
union GpuMemoryCreateFlags
{
struct
{
uint64 virtualAlloc : 1; ///< Create a _virtual_ as opposed to _real_ GPU memory allocation.
/// Only VA space will be allocated, and pages must be mapped via
/// IQueue::RemapVirtualMemoryPages().
uint64 shareable : 1; ///< Memory can be shared between devices in the same process that
/// report the sharedMemory flag from
/// IDevice::GetMultiGpuCompatibility().
uint64 interprocess : 1; ///< Memory will be visible to other processes
/// (they may choose to open it).
uint64 presentable : 1; ///< Memory can be bound to an image that will be used by presents.
uint64 flippable : 1; ///< Memory can be bound to an image that will be used by flip
/// presents.
uint64 stereo : 1; ///< Memory will be used for stereo (DXGI or AQBS stereo).
uint64 globallyCoherent : 1; ///< Memory needs to be globally coherent,
/// indicating the driver must manage both
/// CPU caches and GPU caches that are not flushed on
/// command buffer boundaries.
uint64 xdmaBuffer : 1; ///< GPU memory will be used for an XDMA cache buffer for
/// transferring data
/// between GPUs in a multi-GPU configuration.
uint64 turboSyncSurface : 1; ///< The memory will be used for TurboSync private swapchain primary.
uint64 typedBuffer : 1; ///< GPU memory will be permanently considered a single
/// typed buffer pseudo-object
/// with the properties given in typedBufferInfo.
uint64 globalGpuVa : 1; ///< The GPU virtual address must be visible to all devices.
uint64 useReservedGpuVa : 1; ///< Use GPU virtual address previously reserved by another
/// memory object. It is invalid when using the shadow descriptor
/// table VA range.
uint64 autoPriority : 1; ///< Allow the platform to automatically determine the priority of
/// this GPU memory allocation. Flag is only valid if the device
/// reports that it supports this feature, and will result in an
/// error otherwise.
uint64 busAddressable : 1; ///< Create Bus Addressable memory. Allow memory to be used by other
/// device on the PCIe bus by exposing a write-only bus address.
uint64 sdiExternal : 1; ///< Create External Physical memory from an already allocated memory
/// on remote device. Similar to virtual allocations (no physical
/// backing) but have an immutable page mapping. The client must
/// specify surfaceBusAddr and markerBusAddr either at creation time
/// in GpuMemoryCreateInfo or by calling SetSdiRemoteBusAddress
/// once before using the GPU memory. The page mappings for an
/// allocation with this flag set must be initialized by including a
/// reference to it in the ppExternPhysMem list for the first
/// submission that references it.
uint64 sharedViaNtHandle : 1; ///< Memory will be shared by using Nt handle.
uint64 peerWritable : 1; ///< The memory can be open as peer memory and be writable.
uint64 tmzProtected : 1; ///< The memory is protected using TMZ (Trusted Memory Zone) or HSFB
/// (Hybrid Secure Framebuffer). It is not CPU accessible,
/// and GPU access is restricted by the hardware such that data
/// cannot be copied from protected memory into unprotected memory.
uint64 placeholder0 : 1; ///< Placeholder.
uint64 externalOpened : 1; ///< Specifies the GPUMemory is opened.
uint64 restrictedContent : 1; ///< Specifies the GPUMemory is protected content.
uint64 restrictedAccess : 1; ///< Specifies the GPUMemory is restricted shared access resource.
uint64 crossAdapter : 1; ///< Specifies the GPUMemory is shared cross-adapter resource.
uint64 cpuInvisible : 1; ///< By default, PAL makes every allocation CPU visible if all of its
/// preferred
///< heaps are CPU visible. This flag can be used to override this
/// behavior when the client knows the memory will never be mapped
/// for CPU access. If this flag is set, calls to IGpuMemory::Map()
/// on this object will fail.
uint64 gl2Uncached : 1; ///< Specifies the GPU Memory is un-cached on GPU L2 cache.
/// But the memory still would be cached by other cache hierarchy
/// like L0, RB caches, L1, and L3.
uint64 mallRangeActive : 1; ///< If set, then this allocation will be partially allocated in the
/// MALL. If this is set, then the mallPolicy enumeration must be set
/// to either "always" or "never".
uint64 explicitSync : 1; ///< If set, shared memory will skip syncs in the kernel and all
/// drivers that use this memory must handle syncs explicitly.
uint64 privPrimary : 1; ///< This is a private primary surface gpu memory.
uint64 privateScreen : 1; ///< GPU memory will be used for a private screen image.
uint64 kmdShareUmdSysMem : 1; ///< UMD will allocate/free a memory buffer to be shared with KMD.
uint64 deferCpuVaReservation : 1; ///< KMD will allocate with the "CpuVisibleOnDemand" alloc flag.
/// Ignored for non-CPU-visible allocations.
uint64 placeholder1 : 1;
uint64 startVaHintFlag : 1; ///< startVaHintFlag is set to 1 for passing startVaHint address
/// to set baseVirtAddr as startVaHint for memory allocation.
#if PAL_AMDGPU_BUILD
uint64 initializeToZero : 1; ///< If set, PAL will request that the host OS zero-initializes
/// the allocation upon creation, currently, only GpuHeapLocal and
/// GpuHeapInvisible are supported.
uint64 discardable : 1; ///< If set, this gpu memory object can be discarded under memory
/// pressure without keeping the content.
#else
uint64 placeholder2 : 2;
#endif
uint64 reserved : 29; ///< Reserved for future use.
};
uint64 u64All; ///< Flags packed as 64-bit uint.
};
/// Specifies properties of a typed buffer pseudo-object. When this is specified in GpuMemoryCreateInfo along with the
/// typedBuffer flag, the GPU memory object has been permanently cast as a single typed buffer. A typed buffer is very
/// similar to a linear 3D image: it has a format, extent, and row/depth pitch values.
///
/// Note that the typed buffer concept is used in other parts of the PAL interface and some of those instances may not
/// require a permanent typed buffer association. In such cases multiple typed buffers can be "bound" to one GPU memory
/// object at arbitrary offsets without any need to set the typedBuffer flag or fill out a TypedBufferCreateInfo.
struct TypedBufferCreateInfo
{
SwizzledFormat swizzledFormat; ///< Pixel format and channel swizzle.
Extent3d extent; ///< Dimensions in pixels WxHxD.
uint32 rowPitch; ///< Offset in bytes between the same X position on two consecutive lines.
uint32 depthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
bool depthIsSubres; ///< True if the depth slices should be treated as an array of 2D subresources.
};
/// Specifies properties for @ref IGpuMemory creation. Input structure to IDevice::CreateGpuMemory().
///
/// See the @ref IGpuMemory for additional restrictions on the size, alignment, vaRange, and descrVirtAddr fields.
struct GpuMemoryCreateInfo
{
GpuMemoryCreateFlags flags; ///< GPU memory flags.
gpusize size; ///< Amount of GPU memory to allocate in bytes.
gpusize alignment; ///< Byte alignment of the allocation's GPU VA. If zero, an
/// alignment matching the allocation granularity will be used.
VaRange vaRange; ///< Virtual address range for the GPU memory allocation.
union
{
const IGpuMemory* pReservedGpuVaOwner; ///< Must be zero unless "useReservedGpuVa" is true. It points to
/// the memory object which previously reserved the GPU VA range
/// to be used by the new memory object.
gpusize descrVirtAddr; ///< Must be zero unless vaRange is ShadowDescriptorTable, in
/// which case it must specify the GPU VA of the corresponding
/// DescriptorTable. It doesn't need to be the base VA of the
/// DescriptorTable allocation but must be aligned to
/// "alignment".
gpusize replayVirtAddr; ///< Must be zero unless vRange is CaptureReplay, in which case
/// it must specify the GPU VA of the corresponding memory object.
gpusize startVaHint; ///< Client passes a start VA hint to set as baseVirtAddr. If the
/// given hint is not properly aligned, find next higher aligned
/// address as hint. If the hint is available and within right
/// vaRange where vaRange is VaRange::Default then set baseVirtAddr
/// as hint. If the hint is unavailable, find the higher available
/// address between startVaHint and max vaRange. If any of the two
/// cases are failed, set baseVirtAddr as normal.
};
GpuMemPriority priority; ///< Hint to the OS paging process on how important it is to keep
/// this allocation in its preferred heap.
GpuMemPriorityOffset priorityOffset; ///< Offset from the base level priority. A higher offset means
/// higher priority within same base Level. Currently supported
/// on Windows only.
GpuMemMallPolicy mallPolicy; ///< Used to control whether or not this allocation will be
/// accessed via the MALL (memory access last level). Only valid
/// if "supportsMall" is set in DeviceProperties.
GpuMemMallRange mallRange; ///< These parameters are only meaningful if flags.mallRangeActive
/// is set. Any pages outside of this range will use the opposite
/// MALL policy from what is specified in "mallPolicy".
GpuHeapAccess heapAccess; ///< Describes how the allocation will be accessed. If set to
/// something other than
/// @ref GpuHeapAccess::GpuHeapAccessExplicit, then PAL decides
/// the appropriate heap to allocate memory from based on this
/// member, and @ref heaps is ignored. Otherwise heap selection
/// respects the selection in @ref heaps.
uint32 heapCount; ///< Number of entries in heaps[]. Must be 0 for virtual
/// allocations.
GpuHeap heaps[GpuHeapCount]; ///< List of allowed memory heaps, in order of preference. It will
/// be ignored if @ref heapAccess is to something other than
/// @ref GpuHeapAccess::GpuHeapAccessExplicit.
IImage* pImage; ///< The pointer to an Image object the memory object will be
/// bound to. It must be specified in special cases where a
/// memory object is permanently linked to an Image such as
/// presentable images or shared resources on Windows.
TypedBufferCreateInfo typedBufferInfo; ///< If typedBuffer is set this GPU memory will be permanently
/// considered a typed buffer.
VirtualGpuMemAccessMode virtualAccessMode; ///< Access mode for virtual GPU memory's unmapped pages.
/// This parameter is ignored on some platforms.
gpusize surfaceBusAddr; ///< Surface bus address of Bus Addresable Memory.
/// Only valid when GpuMemoryCreateFlags::sdiExternal is set.
gpusize markerBusAddr; ///< Marker bus address of Bus Addresable Memory. Client can:
/// 1. Write to marker 2.Let GPU wait until a value is written
/// to marker before issuing next command.
/// Only valid when GpuMemoryCreateFlags::sdiExternal is set.
};
/// Specifies properties for @ref IGpuMemory creation. Input structure to IDevice::CreatePinnedGpuMemory().
///
/// See the @ref IGpuMemory for additional restrictions on the size and vaRange fields.
struct PinnedGpuMemoryCreateInfo
{
const void* pSysMem; ///< Pointer to the system memory that should be pinned for GPU access. Must be
/// aligned to realMemAllocGranularity in DeviceProperties.
size_t size; ///< Amount of system memory to pin for GPU access.
VaRange vaRange; ///< Virtual address range for the GPU memory allocation.
gpusize alignment; ///< Byte alignment of the allocation's GPU VA. If zero, an alignment matching the
/// Platform's allocation granularity will be used.
GpuMemMallPolicy mallPolicy; ///< Used to control whether or not this allocation will be
/// accessed via the MALL (memory access last level). Only valid
/// if "supportsMall" is set in DeviceProperties.
GpuMemMallRange mallRange; ///< These parameters are only meaningful if flags.mallRangeActive
/// is set. Any pages outside of this range will use the opposite
/// MALL policy from what is specified in "mallPolicy".
};
/// Specifies properties for @ref IGpuMemory creation. Input structure to IDevice::CreateSvmGpuMemory().
///
/// See the @ref IGpuMemory for additional restrictions on the size and alignment.
struct SvmGpuMemoryCreateInfo
{
GpuMemoryCreateFlags flags; ///< GPU memory flags.
gpusize size; ///< Amount of SVM memory to allocate in bytes.
/// The total amount of SVM memory can't exceed the value set in
/// maxSvmSize when the platform is created.
gpusize alignment; ///< Byte alignment of the allocation's SVM VA. If zero, an
/// alignment matching the allocation granularity will be used.
const IGpuMemory* pReservedGpuVaOwner; ///< Must be zero unless "useReservedGpuVa" is true. It points to
/// the memory object which previously reserved the GPU VA range
/// to be used by the new memory object.
bool isUsedForKernel; ///< Memory will be used to store kernel and execute on gpu.
GpuMemMallPolicy mallPolicy; ///< Used to control whether or not this allocation will be
/// accessed via the MALL (memory access last level). Only valid
/// if "supportsMall" is set in DeviceProperties.
GpuMemMallRange mallRange; ///< These parameters are only meaningful if flags.mallRangeActive
/// is set. Any pages outside of this range will use the opposite
/// MALL policy from what is specified in "mallPolicy".
};
/// Specifies parameters for opening a shared GPU memory object on another device.
struct GpuMemoryOpenInfo
{
IGpuMemory* pSharedMem; ///< Shared GPU memory object from another device to open.
};
/// Specifies parameters for opening a GPU memory object on another device for peer-to-peer memory transfers.
struct PeerGpuMemoryOpenInfo
{
IGpuMemory* pOriginalMem; ///< GPU memory object from another device to open for peer-to-peer memory transfers.
};
/// Specifies parameters for opening another non-PAL device's gpu memory for access from this device. Input structure to
/// IDevice::OpenExternalSharedGpuMemory().
struct ExternalGpuMemoryOpenInfo
{
ExternalResourceOpenInfo resourceInfo; ///< Information describing the external gpuMemory.
TypedBufferCreateInfo typedBufferInfo; ///< Information describing the typed buffer information.
GpuMemMallPolicy mallPolicy; ///< Used to control whether or not this allocation will be accessed via
/// the MALL (memory access last level). Only valid if "supportsMall" is
/// set in DeviceProperties.
GpuMemMallRange mallRange; ///< These parameters are only meaningful if flags.mallRangeActive is set.
/// Any pages outside of this range will use the opposite MALL policy
/// from what is specified in "mallPolicy".
union
{
struct
{
uint32 typedBuffer : 1; ///< GPU memory will be permanently considered a single typed buffer pseudo-object
/// with the properties given in typedBufferInfo.
uint32 gl2Uncached : 1; ///< Specifies the GPU Memory is un-cached on GPU L2 cache.
uint32 mallRangeActive : 1; ///< If set, then this allocation will be partially allocated in the MALL.
/// If this is set, then the mallPolicy enumeration must be set to either
/// "always" or "never".
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External Gpu memory open info flags.
};
/// The fundemental information that describes a GPU memory object that is stored directly in each IGpuMemory.
/// It can be accessed without a virtual call via IGpuMemory::Desc().
struct GpuMemoryDesc
{
gpusize gpuVirtAddr; ///< GPU virtual address of the GPU memory allocation.
gpusize size; ///< Size of the GPU memory allocation, in bytes.
gpusize clientSize; ///< Size of the client requested GPU memory allocation, in bytes.
gpusize alignment; ///< Required GPU virtual address alignment, in bytes.
uint32 heapCount; ///< Number of entries in heaps[]. Must be 0 for virtual allocations.
GpuHeap heaps[GpuHeapCount]; ///< List of preferred memory heaps, in order of preference.
gpusize surfaceBusAddr; ///< Bus Address of SDI memory surface and marker. These will not be initialized
gpusize markerBusAddr; /// until the memory is made resident. Client needs to call
/// InitBusAddressableGpuMemory() to query and update before this is valid.
union
{
struct
{
uint32 isVirtual : 1; ///< GPU memory is not backed by physical memory and must be remapped before the
/// GPU can safely access it. Will also be set for sdiExternal allocations. See
/// GpuMemoryCreateFlags::sdiExternal
uint32 isPeer : 1; ///< GPU memory object was created with @ref IDevice::OpenPeerGpuMemory.
uint32 isShared : 1; ///< GPU memory object was created either with
/// @ref IDevice::OpenExternalSharedGpuMemory or OpenSharedGpuMemory.
/// This IGpuMemory references memory created either by another process or another
/// device with the exception of peer access.
uint32 isExternal: 1; ///< GPU memory object was created with @ref IDevice::OpenExternalSharedGpuMemory.
/// This IGpuMemory references memory that was created either by another process
/// or by a device that doesn't support sharedMemory with this object's device
/// (i.e., MDA sharing on Windows).
uint32 isSvmAlloc : 1; ///< GPU memory is allocated in system memory.
/// Valid only when IOMMUv2 is supported
uint32 isExecutable : 1; ///< GPU memory is used for execution. Valid only when IOMMUv2 is supported
uint32 isExternPhys : 1; ///< GPU memory is External Physical memory
uint32 placeholder0 : 1; ///< Reserved for future memory flag
uint32 placeholder1 : 1; ///< Reserved for future memory flag
uint32 reserved : 23; ///< Reserved for future use
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< GPU memory desc flags.
uint64 uniqueId; ///< Unique ID assigned to each GPU memory object, allowing for client tracking of
/// GPU memory allocations.
};
/// Defines GPU memory sub allocation info. Contains a GPU memory handle to the whole memory. And the offset and size
/// shows where is the sub allocated memory.
struct GpuMemSubAllocInfo
{
gpusize address; ///< Start address of the memory, not including the offset.
gpusize offset; ///< Offset from the start address of the memory.
gpusize size; ///< Size of the memory.
};
/// Specifies a GPU memory object and flags with more specific usage details. An array of these structures is specified
/// to PAL residency operations.
///
/// @see IDevice::AddGpuMemoryReferences
/// @see IQueue::Submit
struct GpuMemoryRef
{
union
{
struct
{
uint32 readOnly : 1; ///< The allocation will not be written using this reference.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< GPU memory reference flags.
IGpuMemory* pGpuMemory; ///< The GPU memory object referenced by this residency operation.
};
/// Specifies a Display Output Post-Processing (DOPP) allocation that will be referenced by a submission along with
/// additional info describing how it will be used.
///
/// @see IQueue::Submit
struct DoppRef
{
union
{
struct
{
uint32 pfpa : 1; ///< Access to this DOPP allocation will be redirected to the primary pending
/// present (i.e., pre-flip primary access). If not set, access will
/// refer to the current onscreen primary.
uint32 lastPfpaCmd : 1; ///< This submission will be the last access of this pfpa allocation
/// for this frame. The pfpa interval will end once this submit
/// completes, allowing the corresponding vidPnSource to flip.
/// This flag is invalid if the pfpa flag is not set.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< GPU memory reference flags.
IGpuMemory* pGpuMemory; ///< The GPU memory object referenced by this residency operation.
};
/// Specifies the types of the exporting memory.
enum class ExportHandleType : uint32
{
Default = 0, ///< Let PAL choose the export type
#if PAL_AMDGPU_BUILD
FileDescriptor, ///< Export using a Linux file descriptor
Kms, ///< Export through KMS
#endif
};
/// Specifies parameters for export a GPUMemory NT handle from its name.
struct GpuMemoryExportInfo
{
ExportHandleType exportType; ///< Type of handle to use for exporting the memory.
};
/**
***********************************************************************************************************************
* @interface IGpuMemory
* @brief Interface representing a GPU-accessible memory allocation.
*
* Depending on creation parameters, this could correspond to:
*
* + A _real_ memory object, corresponding directly to a physical allocation made on this device (whether it resides in
* a local or non-local heap).
* + A _virtual_ memory object, only consisting of virtual address space that can be mapped on a page basis to pages in
* _real_ memory objects via IQueue::RemapVirtualMemoryPages.
* + Pinned memory, a _real_ memory object created by pinning down client system memory.
* + Peer memory, a _real_ memory object corresponding to GPU memory that is likely local to another GPU. Only copy
* operations (peer-to-peer transfers) are allowed with this memory.
* + Opened/shared memory, a _real_ memory object that is fully shared between multiple GPUs, residing in a non-local
* heap.
* + External shared memory, a _real_ memory object that was created by an external process and is fully shared between
* multiple GPUs.
*
* @see IDevice::CreateGpuMemory
* @see IDevice::CreatePinnedGpuMemory
* @see IDevice::OpenSharedGpuMemory
* @see IDevice::OpenPeerGpuMemory
* @see IDevice::OpenExternalSharedGpuMemory
*
*
* All of these kinds of GPU memory are assigned a set of fundemental properties specified in GpuMemoryDesc which are
* either specified by the client or by PAL. There are specific rules these properties must follow; those rules are
* documented here to avoid duplication. Violating these rules will cause the device's corresponding "get size"
* functions to return an error code, the create/open functions may not validate their arguments.
*
*
* With the exception of external memory objects being opened, PAL will adjust size and base alignments as necessary
* to meet device requirements. Typically this means going out to OS page boundaries. The client is no longer required
* to query device requirements and align for PAL.
*
* Note that the device alignment requirements apply equally to GPU VAs. However, other kinds of alignment
* restrictions (e.g., IGpuMemoryBindable's requirements) may only apply to one of those two properties. When creating
* GPU memory objects the client must be careful to set the "alignment" field to the alignment of the GPU VA.
*
*
* Second, the client can't directly specify a memory object's GPU VA but must specify its VA range, limiting which
* portions of the VA space can be used. Note that non-external shared and peer GPU memory objects will use the
* original memory's VA range. External shared GPU memory always uses the default VA range.
*
* The ShadowDescriptorTable VA range is special because it pairs the shadow GPU memory to an existing descriptor GPU
* memory. The client must specify the GPU VA of the corresponding DescriptorTable memory when creating a shadow GPU
* memory object via descrVirtAddr; it must satisfy the alignment requirements of the shadow GPU memory. Both GPU
* memory objects must be created on the same device. Note that descrVirtAddr can be offset into the descriptor
* allocation such that multiple shadow GPU memory objects correspond to one larger descriptor GPU memory object.
*
*
* The client can further influence the GPU VA of shared and peer GPU memory objects. If the globalGpuVa flag is set
* when the original GPU memory object is created, PAL will assign any shared or peer GPU memory objects that same VA.
* Note that globalGpuVa is only supported if globalGpuVaSupport is set in DeviceProperties.
***********************************************************************************************************************
*/
class IGpuMemory : public IDestroyable
{
public:
/// Sets a new priority for this GPU memory object.
///
/// This call is not available for virtual or pinned memory.
///
/// @param [in] priority New base priority for the GPU memory object.
///
/// @param [in] priorityOffset New priority offset for the GPU memory object. This is a small bias that can be
/// used by the OS to raise the importance of an allocation when there are
/// multiple allocations in the same base priority level. You can think of it as
/// the fractional bits of the priority level.
///
/// @returns Success if the priority was successfully updated. Otherwise, one of the following errors may be
/// returned:
/// + ErrorUnavailable if this is a virtual or pinned GPU memory object.
virtual Result SetPriority(
GpuMemPriority priority,
GpuMemPriorityOffset priorityOffset) = 0;
/// Makes the GPU memory available for CPU access and gives the client a pointer to reference it.
///
/// The allocation should be unmapped by the client once CPU access is complete, although it _is_ legal to keep an
/// allocation mapped while the GPU references the allocation from a command buffer.
///
/// It is legal to map the allocation multiple times concurrently. Mapping is not available for pinned or virtual
/// memory objects. This call is thread safe for calls referencing this memory object.
///
/// @see Unmap.
///
/// @param [out] ppData CPU pointer to the GPU memory object.
///
/// @returns Success if the map succeeded. Otherwise, *ppData will not be valid and one of the following errors may
/// be returned.
/// + ErrorInvalidPointer if ppData is null.
/// + ErrorGpuMemoryMapFailed if the object is busy and cannot be mapped by the OS.
/// + ErrorNotMappable if the memory object cannot be mapped due to some of its heaps not having the CPU
/// visible flag set.
/// + ErrorUnavailable if the memory object is not a real allocation.
virtual Result Map(
void** ppData) = 0;
/// Removes CPU access from a previously mapped GPU memory object.
///
/// This call is thread safe for calls referencing the same memory object.
///
/// @see Map
///
/// @returns Success if the unmap succeeded. Otherwise, one of the following errors may be returned:
/// + ErrorGpuMemoryUnmapFailed if the GPU memory object cannot be unlocked.
/// + ErrorUnavailable if the GPU memory object is not a real allocation.
virtual Result Unmap() = 0;
#if PAL_AMDGPU_BUILD
/// Returns an OS-specific handle which can be used to refer to this GPU memory object across processes. This will
/// return a null or invalid handle if the object was not created with the @ref interprocess create flag set.
///
/// @note This function is only available for Linux builds or KMT builds.
///
/// @param [in] handleInfo The info is used to open handle.
///
/// @returns An OS-specific handle which can be used to access the GPU memory object across processes.
virtual OsExternalHandle ExportExternalHandle(const GpuMemoryExportInfo& exportInfo) const = 0;
#endif
/// Returns a structure containing some fundemental information that describes this GPU memory object.
///
/// @returns A reference to this allocation's GpuMemoryDesc.
const GpuMemoryDesc& Desc() const { return m_desc; }
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Set SDI remote surface bus address and marker bus address.
///
/// This GPU memory object must have been created with the sdiExternal flag set and with the GpuMemoryCreateInfo
/// surfaceBusAddr and markerBusAddr fields both set to zero. This function allows clients to defer setting those
/// addresses until after creation. It must be called exactly once to permanently bind the given SDI addresses to
/// this GPU memory object.
///
/// @warning An sdiExternal GPU memory object is not complete until its given its SDI addresses! The gpuVirtAddr
/// field in this GPU memory's GpuMemoryDesc will not be valid until this function is called!
///
/// @param [in] surfaceBusAddr Surface bus address of Bus Addressable Memory.
/// @param [in] markerBusAddr Marker bus address of Bus Addressable Memory. The client can write to the marker
/// and have the GPU wait until a value is written to marker before continuing.
///
/// @returns Success if succeeded. Otherwise, one of the following errors may be returned:
/// + ErrorUnavailable if the GPU memory object is not external physical memory or it has already been set.
/// + ErrorInvalidValue if one of the input params is 0.
/// + One of the escape call failed error.
virtual Result SetSdiRemoteBusAddress(gpusize surfaceBusAddr, gpusize markerBusAddr) = 0;
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IGpuMemory() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IGpuMemory() { }
GpuMemoryDesc m_desc; ///< Information that describes this GPU memory object.
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal