@@ -9,7 +9,7 @@ SPDX-License-Identifier: MIT
9
9
#ifndef VC_BIF_PRINTF_NOT_CM_COMMON_H
10
10
#define VC_BIF_PRINTF_NOT_CM_COMMON_H
11
11
12
- #include < cm-cl/svm .h>
12
+ #include < cm-cl/atomic .h>
13
13
#include < cm-cl/vector.h>
14
14
#include < opencl_def.h>
15
15
@@ -56,48 +56,53 @@ inline int calcRequiredBufferSize(vector<int, ArgsInfoVector::Size> ArgsInfo) {
56
56
return BufferSize;
57
57
}
58
58
59
- static inline BufferElementTy getInitialBufferOffset (uintptr_t BufferPtr,
60
- int RequiredSize) {
61
- constexpr int MagicNumber = 8 ;
62
- constexpr cl_vector<uint64_t , MagicNumber> AddrOffset{0 , 4 , 8 , 12 ,
63
- 16 , 20 , 24 , 28 };
64
- vector<BufferElementTy, MagicNumber> Result;
65
- vector<BufferElementTy, MagicNumber> Size = 0 ;
66
- Size[0 ] = RequiredSize;
67
- vector<uint64_t , MagicNumber> Offsets (AddrOffset);
68
- vector<uintptr_t , MagicNumber> Addr = BufferPtr + Offsets;
69
-
70
- Result = svm::atomic<atomic::operation::add>(Addr, Size);
71
- return Result[0 ];
59
+ // Return initial buffer offset in BufferElementTy elements (not in bytes).
60
+ static inline BufferElementTy
61
+ getInitialBufferOffset (__global BufferElementTy *BufferPtr,
62
+ BufferElementTy RequiredSize) {
63
+ #if __clang_major__ > 9
64
+ int ByteOffset =
65
+ atomic::execute<atomic::operation::add, memory_order_relaxed,
66
+ memory_scope_all_devices>(BufferPtr, RequiredSize);
67
+ #else // __clang_major__ > 9
68
+ // Helping clang-9 correctly deduce the argument type.
69
+ int ByteOffset =
70
+ atomic::execute<atomic::operation::add, memory_order_relaxed,
71
+ memory_scope_all_devices, __global BufferElementTy>(
72
+ BufferPtr, RequiredSize);
73
+ #endif // __clang_major__ > 9
74
+ return ByteOffset / sizeof (BufferElementTy);
72
75
}
73
76
74
- static inline vector<BufferElementTy, AddressVectorWidth >
75
- castPointerToVector (uintptr_t Ptr) {
76
- vector<uint64_t , 1 > Tmp = Ptr;
77
+ template < typename T >
78
+ static vector<BufferElementTy, AddressVectorWidth> castPointerToVector (T * Ptr) {
79
+ vector<uint64_t , 1 > Tmp = reinterpret_cast < uintptr_t >( Ptr) ;
77
80
return Tmp.format <BufferElementTy>();
78
81
}
79
82
80
83
// A helper function to properly set CurAddressLow and CurAddressHigh
81
84
// elements of \p TransferData vector by the provided \p Ptr.
82
85
static inline void
83
86
setCurAddress (vector<BufferElementTy, TransferDataSize> &TransferData,
84
- uintptr_t Ptr) {
87
+ __global BufferElementTy * Ptr) {
85
88
TransferData.select <AddressVectorWidth, 1 >(
86
89
TransferDataLayout::CurAddressLow) = castPointerToVector (Ptr);
87
90
}
88
91
89
92
// A helper function to properly extract current address from \p TransferData.
90
- static inline uintptr_t
93
+ static inline __global BufferElementTy *
91
94
getCurAddress (vector<BufferElementTy, TransferDataSize> TransferData) {
92
95
vector<BufferElementTy, AddressVectorWidth> Address =
93
96
TransferData.select <AddressVectorWidth, 1 >(
94
97
TransferDataLayout::CurAddressLow);
95
98
// Bit-casting to 64-bit int and then truncating if necessary.
96
- return Address.format <uint64_t >();
99
+ return reinterpret_cast <__global BufferElementTy *>(
100
+ static_cast <uintptr_t >(Address.format <uint64_t >()));
97
101
}
98
102
99
103
static inline vector<BufferElementTy, TransferDataSize>
100
- generateTransferData (uintptr_t InitPtr, BufferElementTy ReturnValue) {
104
+ generateTransferData (__global BufferElementTy *InitPtr,
105
+ BufferElementTy ReturnValue) {
101
106
vector<BufferElementTy, TransferDataSize> TransferData;
102
107
setCurAddress (TransferData, InitPtr);
103
108
TransferData[TransferDataLayout::ReturnValue] = ReturnValue;
@@ -111,21 +116,27 @@ vector<BufferElementTy, TransferDataSize>
111
116
printf_init_impl (vector<int , ArgsInfoVector::Size> ArgsInfo) {
112
117
auto FmtStrSize = ArgsInfo[ArgsInfoVector::FormatStrSize];
113
118
if (FmtStrSize > MaxFormatStrSize)
114
- return generateTransferData (/* BufferPtr */ 0 , /* ReturnValue */ -1 );
119
+ return generateTransferData (/* BufferPtr */ nullptr , /* ReturnValue */ -1 );
115
120
auto BufferSize = calcRequiredBufferSize<StringAnnotationSize>(ArgsInfo);
116
- auto BufferPtr = reinterpret_cast <uintptr_t >(cm::detail::printf_buffer ());
121
+ #if __clang_major__ > 9
122
+ auto *BufferPtr =
123
+ static_cast <__global BufferElementTy *>(cm::detail::printf_buffer ());
124
+ #else // __clang_major__ > 9
125
+ // clang-9 cannot handle this auto.
126
+ __global BufferElementTy *BufferPtr =
127
+ static_cast <__global BufferElementTy *>(cm::detail::printf_buffer ());
128
+ #endif // __clang_major__ > 9
117
129
auto Offset = getInitialBufferOffset (BufferPtr, BufferSize);
118
130
return generateTransferData (BufferPtr + Offset, /* ReturnValue */ 0 );
119
131
}
120
132
121
133
// Writes \p Data to printf buffer via \p CurAddress pointer.
122
134
// Returns promoted pointer.
123
- static inline uintptr_t writeElementToBuffer (uintptr_t CurAddress,
124
- BufferElementTy Data) {
125
- vector<uintptr_t , 1 > CurAddressVec = CurAddress;
126
- vector<BufferElementTy, 1 > DataVec = Data;
127
- svm::scatter (CurAddressVec, DataVec);
128
- return CurAddress + sizeof (Data);
135
+ static inline __global BufferElementTy *
136
+ writeElementToBuffer (__global BufferElementTy *CurAddress,
137
+ BufferElementTy Data) {
138
+ *CurAddress = Data;
139
+ return ++CurAddress;
129
140
}
130
141
131
142
// ArgCode is written into printf buffer before every argument.
@@ -189,7 +200,7 @@ printf_arg_impl(vector<BufferElementTy, TransferDataSize> TransferData,
189
200
// Just skip.
190
201
return TransferData;
191
202
vector<BufferElementTy, ArgInfo::Size> Info = getArgInfo<StringArgSize>(Kind);
192
- uintptr_t CurAddress = getCurAddress (TransferData);
203
+ __global BufferElementTy * CurAddress = getCurAddress (TransferData);
193
204
CurAddress = writeElementToBuffer (CurAddress, Info[ArgInfo::Code]);
194
205
for (int Idx = 0 ; Idx != Info[ArgInfo::NumDWords]; ++Idx)
195
206
CurAddress = writeElementToBuffer (CurAddress, Arg[Idx]);
0 commit comments