@@ -1700,6 +1700,54 @@ static bool setEnvVar(const char *name, const char *value) {
17001700 return true ;
17011701}
17021702
1703+ static class ZeUSMImportExtension {
1704+ // Pointers to functions that import/release host memory into USM
1705+ ze_result_t (*zexDriverImportExternalPointer)(ze_driver_handle_t hDriver,
1706+ void *, size_t );
1707+ ze_result_t (*zexDriverReleaseImportedPointer)(ze_driver_handle_t , void *);
1708+
1709+ public:
1710+ // Whether user has requested Import/Release, and platform supports it.
1711+ bool Enabled;
1712+
1713+ ZeUSMImportExtension () : Enabled{false } {}
1714+
1715+ void setZeUSMImport (pi_platform Platform) {
1716+ // Whether env var SYCL_USM_HOSTPTR_IMPORT has been set requesting
1717+ // host ptr import during buffer creation.
1718+ const char *USMHostPtrImportStr = std::getenv (" SYCL_USM_HOSTPTR_IMPORT" );
1719+ if (!USMHostPtrImportStr || std::atoi (USMHostPtrImportStr) == 0 )
1720+ return ;
1721+
1722+ // Check if USM hostptr import feature is available.
1723+ ze_driver_handle_t driverHandle = Platform->ZeDriver ;
1724+ if (ZE_CALL_NOCHECK (zeDriverGetExtensionFunctionAddress,
1725+ (driverHandle, " zexDriverImportExternalPointer" ,
1726+ reinterpret_cast <void **>(
1727+ &zexDriverImportExternalPointer))) == 0 ) {
1728+ ZE_CALL_NOCHECK (
1729+ zeDriverGetExtensionFunctionAddress,
1730+ (driverHandle, " zexDriverReleaseImportedPointer" ,
1731+ reinterpret_cast <void **>(&zexDriverReleaseImportedPointer)));
1732+ // Hostptr import/release is turned on because it has been requested
1733+ // by the env var, and this platform supports the APIs.
1734+ Enabled = true ;
1735+ // Hostptr import is only possible if piMemBufferCreate receives a
1736+ // hostptr as an argument. The SYCL runtime passes a host ptr
1737+ // only when SYCL_HOST_UNIFIED_MEMORY is enabled. Therefore we turn it on.
1738+ setEnvVar (" SYCL_HOST_UNIFIED_MEMORY" , " 1" );
1739+ }
1740+ }
1741+ void doZeUSMImport (ze_driver_handle_t driverHandle, void *HostPtr,
1742+ size_t Size) {
1743+ ZE_CALL_NOCHECK (zexDriverImportExternalPointer,
1744+ (driverHandle, HostPtr, Size));
1745+ }
1746+ void doZeUSMRelease (ze_driver_handle_t driverHandle, void *HostPtr) {
1747+ ZE_CALL_NOCHECK (zexDriverReleaseImportedPointer, (driverHandle, HostPtr));
1748+ }
1749+ } ZeUSMImport;
1750+
17031751pi_result _pi_platform::initialize () {
17041752 // Cache driver properties
17051753 ZeStruct<ze_driver_properties_t > ZeDriverProperties;
@@ -1745,6 +1793,10 @@ pi_result _pi_platform::initialize() {
17451793 zeDriverExtensionMap[extension.name ] = extension.version ;
17461794 }
17471795
1796+ // Check if import user ptr into USM feature has been requested.
1797+ // If yes, then set up L0 API pointers if the platform supports it.
1798+ ZeUSMImport.setZeUSMImport (this );
1799+
17481800 return PI_SUCCESS;
17491801}
17501802
@@ -1854,8 +1906,9 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
18541906 std::copy_n (PiPlatformsCache->begin (), NumEntries, Platforms);
18551907 }
18561908
1857- if (NumPlatforms)
1909+ if (NumPlatforms) {
18581910 *NumPlatforms = PiPlatformsCache->size ();
1911+ }
18591912
18601913 zePrint (" Using events scope: %s\n " ,
18611914 EventsScope == AllHostVisible ? " all host-visible"
@@ -3360,32 +3413,69 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
33603413 else
33613414 Alignment = 1UL ;
33623415
3363- pi_result Result = PI_SUCCESS;
3416+ // If USM Import feature is enabled and hostptr is supplied,
3417+ // import the hostptr if not already imported into USM.
3418+ // Data transfer rate is maximized when both source and destination
3419+ // are USM pointers. Promotion of the host pointer to USM thus
3420+ // optimizes data transfer performance.
3421+ bool HostPtrImported = false ;
3422+ if (ZeUSMImport.Enabled && HostPtr != nullptr &&
3423+ (Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ) {
3424+ // Query memory type of the host pointer
3425+ ze_device_handle_t ZeDeviceHandle;
3426+ ZeStruct<ze_memory_allocation_properties_t > ZeMemoryAllocationProperties;
3427+ ZE_CALL (zeMemGetAllocProperties,
3428+ (Context->ZeContext , HostPtr, &ZeMemoryAllocationProperties,
3429+ &ZeDeviceHandle));
3430+
3431+ // If not shared of any type, we can import the ptr
3432+ if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) {
3433+ // Promote the host ptr to USM host memory
3434+ ze_driver_handle_t driverHandle = Context->Devices [0 ]->Platform ->ZeDriver ;
3435+ ZeUSMImport.doZeUSMImport (driverHandle, HostPtr, Size);
3436+ HostPtrImported = true ;
3437+ }
3438+ }
3439+
3440+ pi_result Result;
33643441 if (DeviceIsIntegrated) {
3365- if (enableBufferPooling ()) {
3366- PI_CALL (piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment));
3367- } else
3368- Result = ZeHostMemAllocHelper (&Ptr, Context, Size);
3442+ if (HostPtrImported) {
3443+ // When HostPtr is imported we use it for the buffer.
3444+ Ptr = HostPtr;
3445+ } else {
3446+ if (enableBufferPooling ()) {
3447+ PI_CALL (piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment));
3448+ } else {
3449+ Result = ZeHostMemAllocHelper (&Ptr, Context, Size);
3450+ }
3451+ }
33693452 } else if (Context->SingleRootDevice ) {
33703453 // If we have a single discrete device or all devices in the context are
33713454 // sub-devices of the same device then we can allocate on device
33723455 if (enableBufferPooling ()) {
33733456 PI_CALL (piextUSMDeviceAlloc (&Ptr, Context, Context->SingleRootDevice ,
33743457 nullptr , Size, Alignment));
3375- } else
3458+ } else {
33763459 Result = ZeDeviceMemAllocHelper (&Ptr, Context, Context->SingleRootDevice ,
33773460 Size);
3461+ }
33783462 } else {
33793463 // Context with several gpu cards. Temporarily use host allocation because
33803464 // it is accessible by all devices. But it is not good in terms of
33813465 // performance.
33823466 // TODO: We need to either allow remote access to device memory using IPC,
33833467 // or do explicit memory transfers from one device to another using host
33843468 // resources as backing buffers to allow those transfers.
3385- if (enableBufferPooling ()) {
3386- PI_CALL (piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment));
3387- } else
3388- Result = ZeHostMemAllocHelper (&Ptr, Context, Size);
3469+ if (HostPtrImported) {
3470+ // When HostPtr is imported we use it for the buffer.
3471+ Ptr = HostPtr;
3472+ } else {
3473+ if (enableBufferPooling ()) {
3474+ PI_CALL (piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment));
3475+ } else {
3476+ Result = ZeHostMemAllocHelper (&Ptr, Context, Size);
3477+ }
3478+ }
33893479 }
33903480
33913481 if (Result != PI_SUCCESS)
@@ -3396,8 +3486,10 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
33963486 (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
33973487 // Initialize the buffer with user data
33983488 if (DeviceIsIntegrated) {
3399- // Do a host to host copy
3400- memcpy (Ptr, HostPtr, Size);
3489+ // Do a host to host copy.
3490+ // For an imported HostPtr the copy is unneeded.
3491+ if (!HostPtrImported)
3492+ memcpy (Ptr, HostPtr, Size);
34013493 } else if (Context->SingleRootDevice ) {
34023494 // Initialize the buffer synchronously with immediate offload
34033495 ZE_CALL (zeCommandListAppendMemoryCopy,
@@ -3406,7 +3498,9 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
34063498 } else {
34073499 // Multiple root devices, do a host to host copy because we use a host
34083500 // allocation for this case.
3409- memcpy (Ptr, HostPtr, Size);
3501+ // For an imported HostPtr the copy is unneeded.
3502+ if (!HostPtrImported)
3503+ memcpy (Ptr, HostPtr, Size);
34103504 }
34113505 } else if (Flags == 0 || (Flags == PI_MEM_FLAGS_ACCESS_RW)) {
34123506 // Nothing more to do.
@@ -3421,7 +3515,7 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
34213515 *RetMem = new _pi_buffer (
34223516 Context, pi_cast<char *>(Ptr) /* Level Zero Memory Handle */ ,
34233517 HostPtrOrNull, nullptr , 0 , 0 ,
3424- DeviceIsIntegrated /* allocation in host memory */ );
3518+ DeviceIsIntegrated /* allocation in host memory */ , HostPtrImported );
34253519 } catch (const std::bad_alloc &) {
34263520 return PI_OUT_OF_HOST_MEMORY;
34273521 } catch (...) {
@@ -3491,11 +3585,17 @@ pi_result piMemRelease(pi_mem Mem) {
34913585 } else {
34923586 auto Buf = static_cast <_pi_buffer *>(Mem);
34933587 if (!Buf->isSubBuffer ()) {
3494- if (enableBufferPooling ()) {
3495- PI_CALL (piextUSMFree (Mem->Context , Mem->getZeHandle ()));
3588+ if (Mem->HostPtrImported ) {
3589+ ze_driver_handle_t driverHandle =
3590+ Mem->Context ->Devices [0 ]->Platform ->ZeDriver ;
3591+ ZeUSMImport.doZeUSMRelease (driverHandle, Mem->MapHostPtr );
34963592 } else {
3497- if (auto Res = ZeMemFreeHelper (Mem->Context , Mem->getZeHandle ()))
3498- return Res;
3593+ if (enableBufferPooling ()) {
3594+ PI_CALL (piextUSMFree (Mem->Context , Mem->getZeHandle ()));
3595+ } else {
3596+ if (auto Res = ZeMemFreeHelper (Mem->Context , Mem->getZeHandle ()))
3597+ return Res;
3598+ }
34993599 }
35003600 }
35013601 }
@@ -6020,7 +6120,8 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer,
60206120
60216121 if (Buffer->MapHostPtr ) {
60226122 *RetMap = Buffer->MapHostPtr + Offset;
6023- if (!(MapFlags & PI_MAP_WRITE_INVALIDATE_REGION))
6123+ if (!Buffer->HostPtrImported &&
6124+ !(MapFlags & PI_MAP_WRITE_INVALIDATE_REGION))
60246125 memcpy (*RetMap, pi_cast<char *>(Buffer->getZeHandle ()) + Offset, Size);
60256126 } else {
60266127 *RetMap = pi_cast<char *>(Buffer->getZeHandle ()) + Offset;
0 commit comments