Skip to content

Commit 14b8ff0

Browse files
dmitryryinteligcbot
authored andcommitted
printf implementation with high-level interfaces
The original printf implementation used cm::svm interfaces the could only be translated into legacy messages. cm::atomic::execute interface and dereferencing are utilized in printf implementation now to generate IR that can be translated into both legacy and LSC messages. Legacy cm::svm interfaces were removed.
1 parent f5a18c1 commit 14b8ff0

File tree

6 files changed

+47
-153
lines changed

6 files changed

+47
-153
lines changed

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/detail/builtins.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,6 @@ int __cm_cl_printf_format_index(__global const char *str);
5656
// switch to using addrspaces.
5757
int __cm_cl_printf_format_index(__private const char *str);
5858

59-
// SVM memory operations have only 64 bit addressing. One can extend the address
60-
// or use statefull operations for 32 bit addressing.
61-
template <typename T, int width>
62-
void __cm_cl_svm_scatter(int num_blocks, vector_impl<uint64_t, width> address,
63-
vector_impl<T, width> src);
64-
65-
template <typename T, int width>
66-
vector_impl<T, width>
67-
__cm_cl_svm_atomic_add(vector_impl<uint64_t, width> address,
68-
vector_impl<T, width> src);
69-
7059
uint32_t __cm_cl_lzd(uint32_t src);
7160
template <int width>
7261
vector_impl<uint32_t, width> __cm_cl_lzd(vector_impl<uint32_t, width> src);
@@ -239,22 +228,6 @@ inline int printf_format_index(__private const char *str) {
239228
return __cm_cl_printf_format_index(str);
240229
}
241230

242-
template <int num_blocks, typename T, int width>
243-
void svm_scatter(vector_impl<uint64_t, width> address,
244-
vector_impl<T, num_blocks * width> src) {
245-
static_assert(sizeof(T) == 1 || sizeof(T) == 4 || sizeof(T) == 8,
246-
"invalid type");
247-
constexpr auto lowered_num_blocks = encode_num_blocks(num_blocks);
248-
static_assert(lowered_num_blocks >= 0, "invalid number of blocks");
249-
__cm_cl_svm_scatter(lowered_num_blocks, address, src);
250-
}
251-
252-
template <typename T, int width>
253-
vector_impl<T, width> svm_atomic_add(vector_impl<uint64_t, width> address,
254-
vector_impl<T, width> src) {
255-
return __cm_cl_svm_atomic_add(address, src);
256-
}
257-
258231
inline uint32_t lzd(uint32_t src) { return __cm_cl_lzd(src); }
259232

260233
template <int width>

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/svm.h

Lines changed: 0 additions & 44 deletions
This file was deleted.

IGC/VectorCompiler/CMCL/lib/Support/TranslationDescription.json

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -149,50 +149,6 @@
149149
]
150150
}
151151
},
152-
"SVMScatter": {
153-
"Name": "svm_scatter",
154-
"Operands": [
155-
{"Name": "NumBlocks", "Kind": "Constant"},
156-
{"Name": "Address", "Kind": "Input"},
157-
{"Name": "Source", "Kind": "Input"}
158-
],
159-
"TranslateInto": {
160-
"VC-Intrinsic": "genx_svm_scatter",
161-
"ReturnType": {"GetBuiltinReturnType": []},
162-
"Operands": [
163-
{"Code": [
164-
"*IRB.CreateVectorSplat(getVectorWidth({}), IRB.getTrue())",
165-
{"GetBuiltinOperandType": ["Address"]}
166-
]},
167-
{"GetBuiltinOperand": ["NumBlocks"]},
168-
{"GetBuiltinOperand": ["Address"]},
169-
{"GetBuiltinOperand": ["Source"]}
170-
]
171-
}
172-
},
173-
"SVMAtomicAdd": {
174-
"Name": "svm_atomic_add",
175-
"Operands": [
176-
{"Name": "Address", "Kind": "Input"},
177-
{"Name": "Source", "Kind": "Input"}
178-
],
179-
"TranslateInto": {
180-
"VC-Intrinsic": "genx_svm_atomic_add",
181-
"ReturnType": {"GetBuiltinReturnType": []},
182-
"Operands": [
183-
{"Code": [
184-
"*IRB.CreateVectorSplat(getVectorWidth({}), IRB.getTrue())",
185-
{"GetBuiltinOperandType": ["Address"]}
186-
]},
187-
{"GetBuiltinOperand": ["Address"]},
188-
{"GetBuiltinOperand": ["Source"]},
189-
{"Code": [
190-
"*UndefValue::get(&{})",
191-
{"GetBuiltinOperandType": ["Source"]}
192-
]}
193-
]
194-
}
195-
},
196152
"LZD": {
197153
"Name": "lzd",
198154
"Operands": [

IGC/VectorCompiler/lib/BiF/printf_not_cm_common.h

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ SPDX-License-Identifier: MIT
99
#ifndef VC_BIF_PRINTF_NOT_CM_COMMON_H
1010
#define VC_BIF_PRINTF_NOT_CM_COMMON_H
1111

12-
#include <cm-cl/svm.h>
12+
#include <cm-cl/atomic.h>
1313
#include <cm-cl/vector.h>
1414
#include <opencl_def.h>
1515

@@ -56,48 +56,53 @@ inline int calcRequiredBufferSize(vector<int, ArgsInfoVector::Size> ArgsInfo) {
5656
return BufferSize;
5757
}
5858

59-
static inline BufferElementTy getInitialBufferOffset(uintptr_t BufferPtr,
60-
int RequiredSize) {
61-
constexpr int MagicNumber = 8;
62-
constexpr cl_vector<uint64_t, MagicNumber> AddrOffset{0, 4, 8, 12,
63-
16, 20, 24, 28};
64-
vector<BufferElementTy, MagicNumber> Result;
65-
vector<BufferElementTy, MagicNumber> Size = 0;
66-
Size[0] = RequiredSize;
67-
vector<uint64_t, MagicNumber> Offsets(AddrOffset);
68-
vector<uintptr_t, MagicNumber> Addr = BufferPtr + Offsets;
69-
70-
Result = svm::atomic<atomic::operation::add>(Addr, Size);
71-
return Result[0];
59+
// Return initial buffer offset in BufferElementTy elements (not in bytes).
60+
static inline BufferElementTy
61+
getInitialBufferOffset(__global BufferElementTy *BufferPtr,
62+
BufferElementTy RequiredSize) {
63+
#if __clang_major__ > 9
64+
int ByteOffset =
65+
atomic::execute<atomic::operation::add, memory_order_relaxed,
66+
memory_scope_all_devices>(BufferPtr, RequiredSize);
67+
#else // __clang_major__ > 9
68+
// Helping clang-9 correctly deduce the argument type.
69+
int ByteOffset =
70+
atomic::execute<atomic::operation::add, memory_order_relaxed,
71+
memory_scope_all_devices, __global BufferElementTy>(
72+
BufferPtr, RequiredSize);
73+
#endif // __clang_major__ > 9
74+
return ByteOffset / sizeof(BufferElementTy);
7275
}
7376

74-
static inline vector<BufferElementTy, AddressVectorWidth>
75-
castPointerToVector(uintptr_t Ptr) {
76-
vector<uint64_t, 1> Tmp = Ptr;
77+
template <typename T>
78+
static vector<BufferElementTy, AddressVectorWidth> castPointerToVector(T *Ptr) {
79+
vector<uint64_t, 1> Tmp = reinterpret_cast<uintptr_t>(Ptr);
7780
return Tmp.format<BufferElementTy>();
7881
}
7982

8083
// A helper function to properly set CurAddressLow and CurAddressHigh
8184
// elements of \p TransferData vector by the provided \p Ptr.
8285
static inline void
8386
setCurAddress(vector<BufferElementTy, TransferDataSize> &TransferData,
84-
uintptr_t Ptr) {
87+
__global BufferElementTy *Ptr) {
8588
TransferData.select<AddressVectorWidth, 1>(
8689
TransferDataLayout::CurAddressLow) = castPointerToVector(Ptr);
8790
}
8891

8992
// A helper function to properly extract current address from \p TransferData.
90-
static inline uintptr_t
93+
static inline __global BufferElementTy *
9194
getCurAddress(vector<BufferElementTy, TransferDataSize> TransferData) {
9295
vector<BufferElementTy, AddressVectorWidth> Address =
9396
TransferData.select<AddressVectorWidth, 1>(
9497
TransferDataLayout::CurAddressLow);
9598
// Bit-casting to 64-bit int and then truncating if necessary.
96-
return Address.format<uint64_t>();
99+
return reinterpret_cast<__global BufferElementTy *>(
100+
static_cast<uintptr_t>(Address.format<uint64_t>()));
97101
}
98102

99103
static inline vector<BufferElementTy, TransferDataSize>
100-
generateTransferData(uintptr_t InitPtr, BufferElementTy ReturnValue) {
104+
generateTransferData(__global BufferElementTy *InitPtr,
105+
BufferElementTy ReturnValue) {
101106
vector<BufferElementTy, TransferDataSize> TransferData;
102107
setCurAddress(TransferData, InitPtr);
103108
TransferData[TransferDataLayout::ReturnValue] = ReturnValue;
@@ -111,21 +116,27 @@ vector<BufferElementTy, TransferDataSize>
111116
printf_init_impl(vector<int, ArgsInfoVector::Size> ArgsInfo) {
112117
auto FmtStrSize = ArgsInfo[ArgsInfoVector::FormatStrSize];
113118
if (FmtStrSize > MaxFormatStrSize)
114-
return generateTransferData(/* BufferPtr */ 0, /* ReturnValue */ -1);
119+
return generateTransferData(/* BufferPtr */ nullptr, /* ReturnValue */ -1);
115120
auto BufferSize = calcRequiredBufferSize<StringAnnotationSize>(ArgsInfo);
116-
auto BufferPtr = reinterpret_cast<uintptr_t>(cm::detail::printf_buffer());
121+
#if __clang_major__ > 9
122+
auto *BufferPtr =
123+
static_cast<__global BufferElementTy *>(cm::detail::printf_buffer());
124+
#else // __clang_major__ > 9
125+
// clang-9 cannot handle this auto.
126+
__global BufferElementTy *BufferPtr =
127+
static_cast<__global BufferElementTy *>(cm::detail::printf_buffer());
128+
#endif // __clang_major__ > 9
117129
auto Offset = getInitialBufferOffset(BufferPtr, BufferSize);
118130
return generateTransferData(BufferPtr + Offset, /* ReturnValue */ 0);
119131
}
120132

121133
// Writes \p Data to printf buffer via \p CurAddress pointer.
122134
// Returns promoted pointer.
123-
static inline uintptr_t writeElementToBuffer(uintptr_t CurAddress,
124-
BufferElementTy Data) {
125-
vector<uintptr_t, 1> CurAddressVec = CurAddress;
126-
vector<BufferElementTy, 1> DataVec = Data;
127-
svm::scatter(CurAddressVec, DataVec);
128-
return CurAddress + sizeof(Data);
135+
static inline __global BufferElementTy *
136+
writeElementToBuffer(__global BufferElementTy *CurAddress,
137+
BufferElementTy Data) {
138+
*CurAddress = Data;
139+
return ++CurAddress;
129140
}
130141

131142
// ArgCode is written into printf buffer before every argument.
@@ -189,7 +200,7 @@ printf_arg_impl(vector<BufferElementTy, TransferDataSize> TransferData,
189200
// Just skip.
190201
return TransferData;
191202
vector<BufferElementTy, ArgInfo::Size> Info = getArgInfo<StringArgSize>(Kind);
192-
uintptr_t CurAddress = getCurAddress(TransferData);
203+
__global BufferElementTy *CurAddress = getCurAddress(TransferData);
193204
CurAddress = writeElementToBuffer(CurAddress, Info[ArgInfo::Code]);
194205
for (int Idx = 0; Idx != Info[ArgInfo::NumDWords]; ++Idx)
195206
CurAddress = writeElementToBuffer(CurAddress, Arg[Idx]);

IGC/VectorCompiler/lib/BiF/printf_ocl_genx.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ printf_fmt_impl(vector<BufferElementTy, TransferDataSize> TransferData,
2828
if (TransferData[TransferDataLayout::ReturnValue])
2929
// Just skip.
3030
return TransferData;
31-
uintptr_t CurAddress = getCurAddress(TransferData);
31+
__global BufferElementTy *CurAddress = getCurAddress(TransferData);
3232
BufferElementTy Index = detail::printf_format_index(FormatString);
3333
CurAddress = writeElementToBuffer(CurAddress, Index);
3434
setCurAddress(TransferData, CurAddress);
@@ -49,7 +49,7 @@ printf_arg_str_impl(vector<BufferElementTy, TransferDataSize> TransferData,
4949
if (TransferData[TransferDataLayout::ReturnValue])
5050
// Just skip.
5151
return TransferData;
52-
uintptr_t CurAddress = getCurAddress(TransferData);
52+
__global BufferElementTy *CurAddress = getCurAddress(TransferData);
5353
BufferElementTy Index = detail::printf_format_index(String);
5454
CurAddress = writeElementToBuffer(CurAddress, ArgCode::String);
5555
CurAddress = writeElementToBuffer(CurAddress, Index);

IGC/VectorCompiler/lib/BiF/printf_ze_genx.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@ printf_fmt_impl(vector<BufferElementTy, TransferDataSize> TransferData,
2828
if (TransferData[TransferDataLayout::ReturnValue])
2929
// Just skip.
3030
return TransferData;
31-
uintptr_t CurAddress = getCurAddress(TransferData);
32-
auto StrAddress =
33-
castPointerToVector(reinterpret_cast<uintptr_t>(FormatString));
31+
__global BufferElementTy *CurAddress = getCurAddress(TransferData);
32+
auto StrAddress = castPointerToVector(FormatString);
3433
for (int Idx = 0; Idx != StringDWordSize; ++Idx)
3534
CurAddress = writeElementToBuffer(CurAddress, StrAddress[Idx]);
3635
setCurAddress(TransferData, CurAddress);
@@ -43,9 +42,8 @@ template <typename T>
4342
vector<BufferElementTy, TransferDataSize>
4443
printf_arg_str_impl(vector<BufferElementTy, TransferDataSize> TransferData,
4544
T *String) {
46-
return printf_arg_impl<StringDWordSize>(
47-
TransferData, ArgKind::String,
48-
castPointerToVector(reinterpret_cast<uintptr_t>(String)));
45+
return printf_arg_impl<StringDWordSize>(TransferData, ArgKind::String,
46+
castPointerToVector(String));
4947
}
5048

5149
extern "C" cl_vector<BufferElementTy, TransferDataSize>

0 commit comments

Comments
 (0)