Skip to content

Commit 8ca849a

Browse files
trbauerigcbot
authored andcommitted
DPAS intrinsics
Enables certain DPAS intrinsics
1 parent b082e03 commit 8ca849a

File tree

7 files changed

+761
-4
lines changed

7 files changed

+761
-4
lines changed

IGC/BiFModule/Languages/OpenCL/IBiF_dpas.cl

Lines changed: 672 additions & 0 deletions
Large diffs are not rendered by default.

IGC/BiFModule/Languages/OpenCL/PreRelease/IBIF_PreRelease_Impl.cl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,14 @@ SPDX-License-Identifier: MIT
3636
//*****************************************************************************/
3737
#include "IBiF_matrix.cl"
3838

39+
//*****************************************************************************/
40+
// DPAS Functions - Extension
41+
//*****************************************************************************/
42+
#include "IBiF_dpas.cl"
43+
44+
//*****************************************************************************/
45+
// DP4A Dot product extensions
46+
//*****************************************************************************/
47+
#include "IBiF_Dot_Product.cl"
48+
3949

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,35 @@ static e_alignment GetPreferredAlignmentOnUse(llvm::Value* V, WIAnalysis* WIA,
21822182
return (pCtx->platform.getGRFSize() == 64) ? EALIGN_32WORD : EALIGN_HWORD;
21832183
}
21842184
}
2185+
GenISAIntrinsic::ID gid = GII->getIntrinsicID();
2186+
if (GII && (gid == GenISAIntrinsic::GenISA_dpas ||
2187+
gid == GenISAIntrinsic::GenISA_sub_group_dpas))
2188+
{
2189+
// Only oprd1 could be uniform and its alignment could
2190+
// be less than GRF. All the others are GRF-aligned.
2191+
if (aV == GII->getArgOperand(1)) {
2192+
ConstantInt* pa = dyn_cast<ConstantInt>(GII->getOperand(3)); // oprd1's precision
2193+
ConstantInt* sdepth = dyn_cast<ConstantInt>(GII->getOperand(5));
2194+
2195+
int PA = (int)pa->getSExtValue();
2196+
int SD = (int)sdepth->getSExtValue();
2197+
uint32_t bits = getPrecisionInBits((PrecisionType)PA);
2198+
uint32_t OPS_PER_CHAN = (GII->getType()->isFloatTy() ? 2 : 4);
2199+
bits = bits * OPS_PER_CHAN;
2200+
bits = bits * SD;
2201+
uint32_t NDWs = bits / 32;
2202+
switch (NDWs) {
2203+
default:
2204+
break;
2205+
case 2:
2206+
return EALIGN_QWORD;
2207+
case 4:
2208+
return EALIGN_OWORD;
2209+
case 8:
2210+
return EALIGN_HWORD;
2211+
}
2212+
}
2213+
}
21852214
}
21862215
return EALIGN_AUTO;
21872216
};

IGC/Compiler/CISACodeGen/DeSSA.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -426,10 +426,10 @@ bool DeSSA::runOnFunction(Function& MF)
426426
// so it is likely for the algorithm to coalesce the phi's dst and the
427427
// other src that is used in the loop, and therefore remove mov instrutions
428428
// in the loop.
429-
//
430-
// Note that isolating a value introduce additional copy, thus a threshold
431-
// is used here as a heuristic to try to make sure that a benefit is more
432-
// than the cost.
429+
//
430+
// Note that isolating a value introduce additional copy, thus a threshold
431+
// is used here as a heuristic to try to make sure that a benefit is more
432+
// than the cost.
433433
enum { PHI_SRC_USE_THRESHOLD = 3 }; // arbitrary number
434434
DenseMap<Value*, int> PHILoopPreHeaderSrcs;
435435

IGC/Compiler/CISACodeGen/VariableReuseAnalysis.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,49 @@ void VariableReuseAnalysis::mergeVariables(Function* F)
241241
if (GenIntrinsicInst * CI = dyn_cast<GenIntrinsicInst>(I))
242242
{
243243
switch (CI->getIntrinsicID()) {
244+
case GenISAIntrinsic::GenISA_sub_group_dpas:
245+
case GenISAIntrinsic::GenISA_dpas:
246+
{
247+
if (!m_DeSSA) {
248+
// Skip if no DeSSA
249+
break;
250+
}
251+
252+
Value* out = CI;
253+
Value* input = CI->getOperand(0);
254+
255+
if (!(isa<Instruction>(input) || isa<Argument>(input)))
256+
{
257+
// input may be a constant for example
258+
break;
259+
}
260+
Type* OTy = out->getType();
261+
Type* ITy = input->getType();
262+
if (getTypeSizeInBits(OTy) != getTypeSizeInBits(ITy))
263+
{
264+
// If out and input are different size, skip
265+
break;
266+
}
267+
268+
// For now, coalescing out and input if at least one of them
269+
// is local, and input is the last use.
270+
if ((m_WIA && m_WIA->whichDepend(out) == m_WIA->whichDepend(input)) &&
271+
!hasBeenPayloadCoalesced(input) &&
272+
!hasBeenPayloadCoalesced(out) &&
273+
!m_DeSSA->interfere(out, input))
274+
{
275+
// For dpas, alignment for out/input are the same
276+
e_alignment align = EALIGN_AUTO;
277+
if (m_WIA) {
278+
align = GetPreferredAlignment(out, m_WIA, m_pCtx);
279+
}
280+
// Make sure that nodes have been created before doing union
281+
m_DeSSA->addReg(out, align);
282+
m_DeSSA->addReg(input, align);
283+
m_DeSSA->unionRegs(out, input);
284+
}
285+
break;
286+
}
244287
default:
245288
break;
246289
} // End of switch

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ void initializeSubGroupFuncsResolutionPass(llvm::PassRegistry&);
112112
void initializeTransformUnmaskedFunctionsPassPass(llvm::PassRegistry&);
113113
void initializeIndirectCallOptimizationPass(llvm::PassRegistry&);
114114
void initializePromoteInt8TypePass(llvm::PassRegistry&);
115+
void initializeDpasFuncsResolutionPass(llvm::PassRegistry&);
115116
void initializePrepareLoadsStoresPassPass(llvm::PassRegistry&);
116117
void initializeVectorBitCastOptPass(llvm::PassRegistry&);
117118
void initializeVectorPreProcessPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/Scalarizer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,8 @@ void ScalarizeFunction::scalarizeInstruction(PHINode* PI)
629629
{
630630
default:
631631
break;
632+
case GenISAIntrinsic::GenISA_sub_group_dpas:
633+
case GenISAIntrinsic::GenISA_dpas:
632634
case GenISAIntrinsic::GenISA_simdBlockWrite:
633635
recoverNonScalarizableInst(PI);
634636
return;

0 commit comments

Comments
 (0)