@@ -581,7 +581,7 @@ void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst &I) {
581581 // Valid vec alias and add it into alias map
582582 addVecAlias (EEI_nv, vec_nv, vecVal, iIdx);
583583
584- // Mark this inst as noop inst
584+ // Mark this inst as no-op inst
585585 m_HasBecomeNoopInsts[EEI] = 1 ;
586586}
587587
@@ -657,9 +657,13 @@ void VariableReuseAnalysis::printAlias(raw_ostream &OS, const Function *F) const
657657 for (auto VI : BV->Aliasers ) {
658658 SSubVecDesc *aSV = VI;
659659 Value *aliaser = aSV->Aliaser ;
660+ bool HasBeenNoop = false ;
661+ if (Instruction *AliaserInst = dyn_cast<Instruction>(aliaser))
662+ HasBeenNoop = (m_HasBecomeNoopInsts.count (AliaserInst) > 0 );
660663 bool isSinglVal = m_DeSSA ? m_DeSSA->isSingleValued (aliaser) : true ;
664+ const char *Noop = HasBeenNoop ? " [no-op]" : " " ;
661665 const char *inCC = !isSinglVal ? " .inDessaCC" : " " ;
662- OS << " " << *aliaser << " [" << aSV->StartElementOffset << " ]" << inCC << " \n " ;
666+ OS << " " << *aliaser << " [" << aSV->StartElementOffset << " ]" << inCC << Noop << " \n " ;
663667 }
664668 OS << " \n " ;
665669 }
@@ -887,16 +891,16 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
887891 IGC_ASSERT_MESSAGE (IEI_ix < nelts, " ICE: IEI's index out of bound!" );
888892 SVecInsEltInfo &InsEltInfo = AllIEIs[IEI_ix];
889893 if (InsEltInfo.IEI ) {
890- // One element is inserted more than once, skip.
894+ // This element is inserted more than once, skip.
891895 return false ;
892896 }
893897 InsEltInfo.IEI = I;
894898 InsEltInfo.Elt = E;
895899 InsEltInfo.FromVec = V;
896900 InsEltInfo.FromVec_eltIx = V_ix;
897- if (E) {
898- InsEltInfo. EEI = dyn_cast<ExtractElementInst>(E);
899- }
901+
902+ // So far, E is never nullptr (could be in the future)
903+ InsEltInfo. EEI = dyn_cast_or_null<ExtractElementInst>(E);
900904
901905 if (!I->hasOneUse ()) {
902906 break ;
@@ -923,19 +927,26 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
923927 if (tV == nullptr )
924928 return false ;
925929
926- // Expect node values for all IEIs are identical. In general, if they
927- // are in the same DeSSA CC, that would be fine.
930+ // Expect all IEIs are in the same DeSSA CC (DeSSA special-handles IEIs)
928931 Value *tV_nv = m_DeSSA->getNodeValue (tV);
929932 if (V_root != getRootValue (tV_nv))
930933 return false ;
931934
932935 Value *E = AllIEIs[i].Elt ;
936+ if (!E || isa<Constant>(E)) {
937+ // constant is okay for either non-uniform or uniform.
938+ // (Note: if any E is constant, this chain of IEI cannot be
939+ // a sub-vector of another larger vector).
940+ continue ;
941+ }
933942 Value *FromVec = AllIEIs[i].FromVec ;
934- Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
935- // check if FromVec has been coalesced with IEI already by DeSSA.
936- // (Wouldn't happen under current DeSSA, but might happen in future)
937- if (V_root == getRootValue (FromVec_nv))
938- return false ;
943+ if (FromVec) {
944+ Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
945+ // check if FromVec has been coalesced with IEI already by DeSSA.
946+ // (Wouldn't happen under current DeSSA, but might happen in future)
947+ if (V_root == getRootValue (FromVec_nv))
948+ return false ;
949+ }
939950
940951 // Make sure FromVec or E have the same uniformness as V.
941952 if ((E && V_dep != m_WIA->whichDepend (E)) || (FromVec && V_dep != m_WIA->whichDepend (FromVec)))
@@ -946,7 +957,7 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
946957
947958Value *VariableReuseAnalysis::traceAliasValue (Value *V) {
948959 if (CastInst *CastI = dyn_cast_or_null<CastInst>(V)) {
949- // Only handle Noop cast inst. For example,
960+ // Only handle no-op cast inst. For example,
950961 // dst = bitcast <3 x i32> src to <3 x float>,
951962 // it is okay, but the following isn't.
952963 // dst = bitcast <3 x i64> src to <6 x i32>
@@ -969,17 +980,13 @@ Value *VariableReuseAnalysis::traceAliasValue(Value *V) {
969980}
970981
971982//
972- // Returns true if the following is true
983+ // Returns true if there is the following pattern; otherwise return false.
973984// IEI = insertElement <vectorType> Vec, S, <constant IEI_ix>
974- // Return false, otherwise.
975- //
976- // When the above condition is true, V and V_ix are used for the
977- // following cases:
978- // 1. S is from another vector V.
979- // S = extractElement <vectorType> V, <constant V_ix>
980- // S is the element denoted by (V, V_ix)
981- // 2. otherwise, V=nullptr, V_ix=0.
982- // S is a candidate inserted and could be alias to the vector.
985+ // 1. S is from another vector V.
986+ // S = extractElement <vectorType> V, <constant V_ix>
987+ // In this case, S is the element denoted by (V, V_ix)
988+ // 2. otherwise, V=nullptr, V_ix=0.
989+ // S is some value other than a vector element.
983990//
984991// Input: IEI
985992// Output: IEI_ix, S, V, V_ix
@@ -999,9 +1006,9 @@ bool VariableReuseAnalysis::getElementValue(InsertElementInst *IEI, int &IEI_ix,
9991006 IEI_ix = (int )CI->getZExtValue ();
10001007
10011008 Value *elem0 = IEI->getOperand (1 );
1002- if (hasBeenPayloadCoalesced (elem0) || isa<Constant>(elem0) || isOrCoalescedWithArg (elem0)) {
1003- // If elem0 has been payload-coalesced, is constant,
1004- // or it has been aliased to an argument, skip it.
1009+ if (hasBeenPayloadCoalesced (elem0) || isOrCoalescedWithArg (elem0)) {
1010+ // If elem0 has been payload-coalesced or it has been aliased to
1011+ // an argument, skip it.
10051012 return false ;
10061013 }
10071014
@@ -1046,11 +1053,10 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
10461053
10471054 // IGC Key VectorAlias controls vectorAlias optimiation.
10481055 //
1049- // Do it if VectorAlias != 0.
1050- // VectorAlias=0x1: subvec aliasing for isolated values
1051- // (getRootValue()=null)
1052- // =0x2: subvec aliasing for both isolated and non-isolated
1053- // value)
1056+ // VectorAlias (also from m_pCtx->getVectorCoalescingControl())
1057+ // 0x0: disable vector aliasing
1058+ // 0x1: subvec aliasing for isolated values (getRootValue()=null)
1059+ // 0x2: subvec aliasing for both isolated and non-isolated value)
10541060 const auto control = (m_pCtx->getVectorCoalescingControl () & 0x3 );
10551061 // To avoid increasing GRF pressure, skip if F is too large or not an entry
10561062 const int32_t NumBBThreshold = IGC_GET_FLAG_VALUE (VectorAliasBBThreshold);
@@ -1078,9 +1084,9 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
10781084 // In this case, 's' becomes a part of 'b'. In LLVM IR,
10791085 // there are a chain of extElt and insElt instructions for
10801086 // doing so.
1081- // 2. insertTo: sub- vector is used to create a base vector.
1087+ // 2. insertTo: small vector is used to create a larger base vector.
10821088 // For example:
1083- // given sub- vector int4 s0, s1; int8 vector b is created like:
1089+ // given small vector int4 s0, s1; int8 vector b is created like:
10841090 // b = (int8) (s0, s1)
10851091 // In this case, both s0 and s1 become part of b.
10861092
@@ -1095,12 +1101,12 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
10951101 continue ;
10961102 }
10971103
1098- // Check if this is an extractFrom pattern, if so, add alias.
1104+ // Case 1: check if this is an extractFrom pattern, if so, add alias.
10991105 if (processExtractFrom (AllIEIs)) {
11001106 continue ;
11011107 }
11021108
1103- // Check if this is an insertTo pattern, if so add alias.
1109+ // Case 2: check if this is an insertTo pattern, if so add alias.
11041110 if (processInsertTo (BB, AllIEIs)) {
11051111 continue ;
11061112 }
@@ -1123,6 +1129,8 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
11231129 }
11241130
11251131 for (int i = 1 ; i < nelts; ++i) {
1132+ // If any of AllIEIs[i] has a constant element (IEI's opeand 1), this check
1133+ // will be true, thus AllIEIs cannot be a sub-vector
11261134 if (AllIEIs[i].FromVec != BaseVec || AllIEIs[i].FromVec_eltIx != (BaseStartIx + i))
11271135 return false ;
11281136 }
@@ -1187,9 +1195,9 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
11871195 // add alias
11881196 addVecAlias (Sub_nv, Base_nv, BaseVec, BaseStartIx, BaseAlign);
11891197
1190- // Make sure noop insts are in the map so they won't be emitted later.
1198+ // Make sure no-op insts are in the map so they won't be emitted later.
11911199 for (int i = 0 , sz = nelts; i < sz; ++i) {
1192- // IEI chain is coalesced by DeSSA, so it's safe to mark it as noop
1200+ // IEI chain is coalesced by DeSSA, so it's safe to mark it as no-op
11931201 InsertElementInst *IEI = AllIEIs[i].IEI ;
11941202 if (!m_DeSSA->isNoopAliaser (IEI)) {
11951203 m_HasBecomeNoopInsts[IEI] = 1 ;
@@ -1198,7 +1206,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
11981206 ExtractElementInst *EEI = AllIEIs[i].EEI ;
11991207 IGC_ASSERT (EEI);
12001208 if (!m_DeSSA->isNoopAliaser (EEI)) {
1201- // Set EEI as an aliser, thus it become noop .
1209+ // Set EEI as an aliser, thus it become no-op .
12021210 Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
12031211 addVecAlias (EEI_nv, Base_nv, BaseVec, AllIEIs[i].FromVec_eltIx , EALIGN_AUTO);
12041212 m_HasBecomeNoopInsts[EEI] = 1 ;
@@ -1253,9 +1261,22 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
12531261 isSubCandidate = false ;
12541262 }
12551263
1256- if (Elt && Sub == nullptr && skipScalarAliaser (BB, Elt)) {
1257- // Skip scalar coalescing
1258- isSubCandidate = false ;
1264+ // Check scalar
1265+ if (isSubCandidate && Elt && Sub == nullptr ) {
1266+ if (isa<Constant>(Elt)) {
1267+ // Skip as alias is b/w two variables
1268+ isSubCandidate = false ;
1269+ } else if (Instruction *TmpInst = dyn_cast<Instruction>(Elt)) {
1270+ // This is to skip inst such as @llvm.genx.GenISA.simdSize(),
1271+ // which is specially handled during EmitCodeGen
1272+ if (m_PatternMatch->SIMDConstExpr (TmpInst))
1273+ isSubCandidate = false ;
1274+ }
1275+
1276+ if (isSubCandidate && skipScalarAliaser (BB, Elt)) {
1277+ // Skip scalar coalescing
1278+ isSubCandidate = false ;
1279+ }
12591280 }
12601281
12611282 // If Sub == nullptr or NextSub != Sub, this is the last element
@@ -1329,9 +1350,9 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
13291350
13301351 int V_sz = getNumElts (V);
13311352 if (V_sz > 1 ) {
1332- // set up Noop inst map to skip emitting them later.
1353+ // set up No-op inst map to skip emitting them later.
13331354 for (int j = V_ix, sz = V_ix + V_sz; j < sz; ++j) {
1334- // Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
1355+ // Safe to mark IEI as no-op as its aliaser will set it
13351356 InsertElementInst *IEI = AllIEIs[j].IEI ;
13361357 if (!m_DeSSA->isNoopAliaser (IEI)) {
13371358 m_HasBecomeNoopInsts[IEI] = 1 ;
@@ -1341,15 +1362,15 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
13411362 IGC_ASSERT (EEI);
13421363 // Sub-vector
13431364 if (!m_DeSSA->isNoopAliaser (EEI)) {
1344- // EEI should be in alias map so it can be marked as noop
1365+ // Safe to set EEI to no-op
13451366 Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
13461367 addVecAlias (EEI_nv, Base_nv, FirstIEI, j);
13471368 m_HasBecomeNoopInsts[EEI] = 1 ;
13481369 }
13491370 }
13501371 } else {
13511372 // scalar
1352- // Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
1373+ // Safe to mark IEI as no-op
13531374 InsertElementInst *IEI = AllIEIs[V_ix].IEI ;
13541375 if (m_DeSSA->isNoopAliaser (IEI))
13551376 continue ;
@@ -1433,8 +1454,11 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateUse(Value
14331454 }
14341455 } else if (StoreInst *SI = dyn_cast<StoreInst>(Val)) {
14351456 retSt = AState::TARGET;
1436- } else if (isa<CallInst>(Val)) {
1437- return AState::SKIP;
1457+ } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1458+ if (CallI->isInlineAsm ())
1459+ retSt = AState::TARGET;
1460+ else
1461+ return AState::SKIP;
14381462 }
14391463 }
14401464 return retSt;
@@ -1460,15 +1484,17 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateDef(Value
14601484 }
14611485 } else if (LoadInst *SI = dyn_cast<LoadInst>(Val)) {
14621486 return AState::TARGET;
1463- } else if (isa<CallInst>(Val)) {
1487+ } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1488+ if (CallI->isInlineAsm ())
1489+ return AState::TARGET;
14641490 return AState::SKIP;
14651491 }
14661492 return AState::OK;
14671493}
14681494
14691495// Vector alias disables extractMask optimization. This function
14701496// checks if extractMask optim can be applied. And the caller
1471- // will decide whether to favor extractMask optimization.
1497+ // will decide whether to favor extractMask optimization or not .
14721498bool VariableReuseAnalysis::isExtractMaskCandidate (Value *V) const {
14731499 auto BIT = [](int n) { return (uint32_t )(1 << n); };
14741500
0 commit comments