diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index f98bd684149f9..79c35b8a4a2a4 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -82,6 +82,17 @@ namespace llvm { /// Dependence::DVEntry - Each level in the distance/direction vector /// has a direction (or perhaps a union of several directions), and /// perhaps a distance. + /// The dependency information could be across a single loop level or across + /// two separate levels that are similar. Two levels are considered similar + /// if they can be interpreted as a single fused loop, i.e., have the same + /// trip count and the same nesting depth. + /// For example, loops b and c are similar and considered as separate loops: + /// for (a = ...) { + /// for (b = 0; b < 10; b++) { + /// } + /// for (c = 0; c < 10; c++) { + /// } + /// } struct DVEntry { enum : unsigned char { NONE = 0, @@ -153,13 +164,26 @@ namespace llvm { /// source and destination of the dependence. virtual unsigned getLevels() const { return 0; } + /// getSeparateLevels - Returns the number of separate loops surrounding + /// the source and destination of the dependence. + virtual unsigned getSeparateLevels() const { return 0; } + + /// getDVEntry - Returns the DV entry associated with a regular or a + /// separate level + DVEntry getDVEntry(unsigned Level, bool Separate) const; + /// getDirection - Returns the direction associated with a particular - /// level. - virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; } + /// common or separate level. + virtual unsigned getDirection(unsigned Level, bool Separate = false) const { + return DVEntry::ALL; + } /// getDistance - Returns the distance (or NULL) associated with a - /// particular level. - virtual const SCEV *getDistance(unsigned Level) const { return nullptr; } + /// particular common or separate level. + virtual const SCEV *getDistance(unsigned Level, + bool Separate = false) const { + return nullptr; + } /// Check if the direction vector is negative. A negative direction /// vector means Src and Dst are reversed in the actual program. @@ -172,21 +196,32 @@ namespace llvm { virtual bool normalize(ScalarEvolution *SE) { return false; } /// isPeelFirst - Returns true if peeling the first iteration from - /// this loop will break this dependence. - virtual bool isPeelFirst(unsigned Level) const { return false; } + /// this regular or separate loop level will break this dependence. + virtual bool isPeelFirst(unsigned Level, bool Separate = false) const { + return false; + } /// isPeelLast - Returns true if peeling the last iteration from - /// this loop will break this dependence. - virtual bool isPeelLast(unsigned Level) const { return false; } + /// this regular or separate loop level will break this dependence. + virtual bool isPeelLast(unsigned Level, bool Separate = false) const { + return false; + } - /// isSplitable - Returns true if splitting this loop will break + /// isSplitable - Returns true if splitting the loop will break /// the dependence. - virtual bool isSplitable(unsigned Level) const { return false; } + virtual bool isSplitable(unsigned Level, bool Separate = false) const { + return false; + } + + /// inSeparateLoops - Returns true if this level is a separate level, i.e., + /// performed across two separate loop nests that are treated like a single + /// fused loop. + virtual bool inSeparateLoops(unsigned Level) const { return false; } - /// isScalar - Returns true if a particular level is scalar; that is, - /// if no subscript in the source or destination mention the induction - /// variable associated with the loop at this level. - virtual bool isScalar(unsigned Level) const; + /// isScalar - Returns true if a particular regular or separate level is + /// scalar; that is, if no subscript in the source or destination mention + /// the induction variable associated with the loop at this level. + virtual bool isScalar(unsigned Level, bool Separate = false) const; /// getNextPredecessor - Returns the value of the NextPredecessor /// field. @@ -212,6 +247,10 @@ namespace llvm { /// void dump(raw_ostream &OS) const; + /// dumpImp - For debugging purposes. Dumps a dependence to OS with or + /// without considering the separate levels. + void dumpImp(raw_ostream &OS, bool Separate = false) const; + protected: Instruction *Src, *Dst; @@ -252,13 +291,31 @@ namespace llvm { /// source and destination of the dependence. unsigned getLevels() const override { return Levels; } + /// getSeparateLevels - Returns the number of separate loops surrounding + /// the source and destination of the dependence. + unsigned getSeparateLevels() const override { return SeparateLevels; } + + /// getDVEntry - Returns the DV entry associated with a regular or a + /// separate level + DVEntry getDVEntry(unsigned Level, bool Separate) const { + if (!Separate) { + assert(0 < Level && Level <= Levels && "Level out of range"); + return DV[Level - 1]; + } else { + assert(Levels < Level && Level <= Levels + SeparateLevels && + "Separate level out of range"); + return DVSeparate[Level - Levels - 1]; + } + } + /// getDirection - Returns the direction associated with a particular - /// level. - unsigned getDirection(unsigned Level) const override; + /// common or separate level. + unsigned getDirection(unsigned Level, bool Separate = false) const override; /// getDistance - Returns the distance (or NULL) associated with a - /// particular level. - const SCEV *getDistance(unsigned Level) const override; + /// particular common or separate level. + const SCEV *getDistance(unsigned Level, + bool Separate = false) const override; /// Check if the direction vector is negative. A negative direction /// vector means Src and Dst are reversed in the actual program. @@ -271,27 +328,34 @@ namespace llvm { bool normalize(ScalarEvolution *SE) override; /// isPeelFirst - Returns true if peeling the first iteration from - /// this loop will break this dependence. - bool isPeelFirst(unsigned Level) const override; + /// this regular or separate loop level will break this dependence. + bool isPeelFirst(unsigned Level, bool Separate = false) const override; /// isPeelLast - Returns true if peeling the last iteration from - /// this loop will break this dependence. - bool isPeelLast(unsigned Level) const override; + /// this regular or separate loop level will break this dependence. + bool isPeelLast(unsigned Level, bool Separate = false) const override; /// isSplitable - Returns true if splitting the loop will break /// the dependence. - bool isSplitable(unsigned Level) const override; + bool isSplitable(unsigned Level, bool Separate = false) const override; - /// isScalar - Returns true if a particular level is scalar; that is, - /// if no subscript in the source or destination mention the induction - /// variable associated with the loop at this level. - bool isScalar(unsigned Level) const override; + /// inSeparateLoops - Returns true if this level is a separate level, i.e., + /// performed across two separate loop nests that are treated like a single + /// fused loop. + bool inSeparateLoops(unsigned Level) const override; + + /// isScalar - Returns true if a particular regular or separate level is + /// scalar; that is, if no subscript in the source or destination mention + /// the induction variable associated with the loop at this level. + bool isScalar(unsigned Level, bool Separate = false) const override; private: unsigned short Levels; + unsigned short SeparateLevels; bool LoopIndependent; bool Consistent; // Init to true, then refine. std::unique_ptr DV; + std::unique_ptr DVSeparate; friend class DependenceInfo; }; @@ -423,7 +487,8 @@ namespace llvm { const SCEV *A; const SCEV *B; const SCEV *C; - const Loop *AssociatedLoop; + const Loop *AssociatedSrcLoop; + const Loop *AssociatedDstLoop; public: /// isEmpty - Return true if the constraint is of kind Empty. @@ -467,19 +532,27 @@ namespace llvm { /// Otherwise assert. LLVM_ABI const SCEV *getD() const; - /// getAssociatedLoop - Returns the loop associated with this constraint. - LLVM_ABI const Loop *getAssociatedLoop() const; + /// getAssociatedSrcLoop - Returns the source loop associated with this + /// constraint. + LLVM_ABI const Loop *getAssociatedSrcLoop() const; + + /// getAssociatedDstLoop - Returns the destination loop associated with + /// this constraint. + LLVM_ABI const Loop *getAssociatedDstLoop() const; /// setPoint - Change a constraint to Point. LLVM_ABI void setPoint(const SCEV *X, const SCEV *Y, - const Loop *CurrentLoop); + const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop); /// setLine - Change a constraint to Line. LLVM_ABI void setLine(const SCEV *A, const SCEV *B, const SCEV *C, - const Loop *CurrentLoop); + const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop); /// setDistance - Change a constraint to Distance. - LLVM_ABI void setDistance(const SCEV *D, const Loop *CurrentLoop); + LLVM_ABI void setDistance(const SCEV *D, const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop); /// setEmpty - Change a constraint to Empty. LLVM_ABI void setEmpty(); @@ -492,6 +565,10 @@ namespace llvm { LLVM_ABI void dump(raw_ostream &OS) const; }; + /// Returns true if two loops are the same or they have the same tripcount + /// and depth + bool areLoopsSimilar(const Loop *SrcLoop, const Loop *DstLoop) const; + /// establishNestingLevels - Examines the loop nesting of the Src and Dst /// instructions and establishes their shared loops. Sets the variables /// CommonLevels, SrcLevels, and MaxLevels. @@ -503,8 +580,8 @@ namespace llvm { /// This lets us allocate vectors MaxLevels in length, with room for every /// distinct loop referenced in both the source and destination subscripts. /// The variable SrcLevels is the nesting depth of the source instruction. - /// It's used to help calculate distinct loops referenced by the destination. - /// Here's the map from loops to levels: + /// It's used to help calculate distinct loops referenced by the + /// destination. Here's the map from loops to levels: /// 0 - unused /// 1 - outermost common loop /// ... - other common loops @@ -542,10 +619,23 @@ namespace llvm { /// e - 5 /// f - 6 /// g - 7 = MaxLevels - void establishNestingLevels(const Instruction *Src, - const Instruction *Dst); - - unsigned CommonLevels, SrcLevels, MaxLevels; + /// SeparateLevels counts the number of levels after common levels that are + /// not common but are similar, meaning that they have the same tripcount + /// and depth. Assume that in this code fragment, levels c and e are + /// similar. In this case only the loop nests at the next level after + /// common levels are similar, and SeparateLevel is set to 1. + /// If there are similar loop nests, we could use the APIs with considering + /// them as fused loops. In that case the level numbers for the previous + /// code look like + /// a - 1 + /// b - 2 + /// c,e - 3 = CommonLevels + /// d - 4 = SrcLevels + /// f - 5 + /// g - 6 = MaxLevels + void establishNestingLevels(const Instruction *Src, const Instruction *Dst); + + unsigned CommonLevels, SrcLevels, MaxLevels, SeparateLevels; /// mapSrcLoop - Given one of the loops containing the source, return /// its level index in our numbering scheme. @@ -684,13 +774,10 @@ namespace llvm { /// Returns true if any possible dependence is disproved. /// If there might be a dependence, returns false. /// Sets appropriate direction and distance. - bool strongSIVtest(const SCEV *Coeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurrentLoop, - unsigned Level, - FullDependence &Result, - Constraint &NewConstraint) const; + bool strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, + const SCEV *DstConst, const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint) const; /// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair /// (Src and Dst) for dependence. @@ -702,13 +789,10 @@ namespace llvm { /// Sets appropriate direction entry. /// Set consistent to false. /// Marks the dependence as splitable. - bool weakCrossingSIVtest(const SCEV *SrcCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurrentLoop, - unsigned Level, - FullDependence &Result, - Constraint &NewConstraint, + bool weakCrossingSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst, + const SCEV *DstConst, const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint, const SCEV *&SplitIter) const; /// ExactSIVtest - Tests the SIV subscript pair @@ -720,13 +804,10 @@ namespace llvm { /// If there might be a dependence, returns false. /// Sets appropriate direction entry. /// Set consistent to false. - bool exactSIVtest(const SCEV *SrcCoeff, - const SCEV *DstCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurrentLoop, - unsigned Level, - FullDependence &Result, + bool exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, + const SCEV *SrcConst, const SCEV *DstConst, + const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop, + unsigned Level, FullDependence &Result, Constraint &NewConstraint) const; /// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair @@ -739,11 +820,9 @@ namespace llvm { /// Sets appropriate direction entry. /// Set consistent to false. /// If loop peeling will break the dependence, mark appropriately. - bool weakZeroSrcSIVtest(const SCEV *DstCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurrentLoop, - unsigned Level, + bool weakZeroSrcSIVtest(const SCEV *DstCoeff, const SCEV *SrcConst, + const SCEV *DstConst, const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint) const; @@ -757,11 +836,9 @@ namespace llvm { /// Sets appropriate direction entry. /// Set consistent to false. /// If loop peeling will break the dependence, mark appropriately. - bool weakZeroDstSIVtest(const SCEV *SrcCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurrentLoop, - unsigned Level, + bool weakZeroDstSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst, + const SCEV *DstConst, const Loop *CurrentSrcLoop, + const Loop *CurrentDstLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint) const; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index c1b1d002c9979..b1a22c03a8644 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -104,6 +104,7 @@ STATISTIC(GCDindependence, "GCD independence"); STATISTIC(BanerjeeApplications, "Banerjee applications"); STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); +STATISTIC(SeparateLoopsConsidered, "Separate loops considered"); static cl::opt Delinearize("da-delinearize", cl::init(true), cl::Hidden, @@ -258,10 +259,7 @@ bool Dependence::isAnti() const { // if no subscript in the source or destination mention the induction // variable associated with the loop at this level. // Leave this out of line, so it will serve as a virtual method anchor -bool Dependence::isScalar(unsigned level) const { - return false; -} - +bool Dependence::isScalar(unsigned level, bool Separate) const { return false; } //===----------------------------------------------------------------------===// // FullDependence methods @@ -273,6 +271,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, : Dependence(Source, Destination, Assumes), Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; + SeparateLevels = 0; if (CommonLevels) DV = std::make_unique(CommonLevels); } @@ -335,51 +334,49 @@ bool FullDependence::normalize(ScalarEvolution *SE) { // The rest are simple getters that hide the implementation. -// getDirection - Returns the direction associated with a particular level. -unsigned FullDependence::getDirection(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].Direction; +// getDirection - Returns the direction associated with a particular common or +// separate level. +unsigned FullDependence::getDirection(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).Direction; } - -// Returns the distance (or NULL) associated with a particular level. -const SCEV *FullDependence::getDistance(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].Distance; +// Returns the distance (or NULL) associated with a particular common or +// separate level. +const SCEV *FullDependence::getDistance(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).Distance; } - -// Returns true if a particular level is scalar; that is, -// if no subscript in the source or destination mention the induction -// variable associated with the loop at this level. -bool FullDependence::isScalar(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].Scalar; +// Returns true if a particular regular or separate level is scalar; that is, +// if no subscript in the source or destination mention the induction variable +// associated with the loop at this level. +bool FullDependence::isScalar(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).Scalar; } - -// Returns true if peeling the first iteration from this loop -// will break this dependence. -bool FullDependence::isPeelFirst(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].PeelFirst; +// Returns true if peeling the first iteration from this regular or separate +// loop level will break this dependence. +bool FullDependence::isPeelFirst(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).PeelFirst; } - -// Returns true if peeling the last iteration from this loop -// will break this dependence. -bool FullDependence::isPeelLast(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].PeelLast; +// Returns true if peeling the last iteration from this regular or separate +// loop level will break this dependence. +bool FullDependence::isPeelLast(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).PeelLast; } - -// Returns true if splitting this loop will break the dependence. -bool FullDependence::isSplitable(unsigned Level) const { - assert(0 < Level && Level <= Levels && "Level out of range"); - return DV[Level - 1].Splitable; +// Returns true if splitting loop will break the dependence. +bool FullDependence::isSplitable(unsigned Level, bool Separate) const { + return getDVEntry(Level, Separate).Splitable; } +// inSeparateLoops - Returns true if this level is a separate level, i.e., +// performed across two separate loop nests that are treated like a single +// fused loop. +bool FullDependence::inSeparateLoops(unsigned Level) const { + assert(0 < Level && Level <= Levels + SeparateLevels && "Level out of range"); + return Level > Levels; +} //===----------------------------------------------------------------------===// // DependenceInfo::Constraint methods @@ -434,38 +431,50 @@ const SCEV *DependenceInfo::Constraint::getD() const { return SE->getNegativeSCEV(C); } +// Returns the source loop associated with this constraint. +const Loop *DependenceInfo::Constraint::getAssociatedSrcLoop() const { + assert((Kind == Distance || Kind == Line || Kind == Point) && + "Kind should be Distance, Line, or Point"); + return AssociatedSrcLoop; +} -// Returns the loop associated with this constraint. -const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { +// Returns the destination loop associated with this constraint. +const Loop *DependenceInfo::Constraint::getAssociatedDstLoop() const { assert((Kind == Distance || Kind == Line || Kind == Point) && "Kind should be Distance, Line, or Point"); - return AssociatedLoop; + return AssociatedDstLoop; } void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, - const Loop *CurLoop) { + const Loop *CurSrcLoop, + const Loop *CurDstLoop) { Kind = Point; A = X; B = Y; - AssociatedLoop = CurLoop; + AssociatedSrcLoop = CurSrcLoop; + AssociatedDstLoop = CurDstLoop; } void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, - const SCEV *CC, const Loop *CurLoop) { + const SCEV *CC, const Loop *CurSrcLoop, + const Loop *CurDstLoop) { Kind = Line; A = AA; B = BB; C = CC; - AssociatedLoop = CurLoop; + AssociatedSrcLoop = CurSrcLoop; + AssociatedDstLoop = CurDstLoop; } void DependenceInfo::Constraint::setDistance(const SCEV *D, - const Loop *CurLoop) { + const Loop *CurSrcLoop, + const Loop *CurDstLoop) { Kind = Distance; A = SE->getOne(D->getType()); B = SE->getNegativeSCEV(A); C = SE->getNegativeSCEV(D); - AssociatedLoop = CurLoop; + AssociatedSrcLoop = CurSrcLoop; + AssociatedDstLoop = CurDstLoop; } void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } @@ -612,8 +621,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { ++DeltaSuccesses; return true; } - if (const SCEVConstant *CUB = - collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { + if (const SCEVConstant *CUB = collectConstantUpperBound( + X->getAssociatedSrcLoop(), Prod1->getType())) { const APInt &UpperBound = CUB->getAPInt(); LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { @@ -622,9 +631,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { return true; } } - X->setPoint(SE->getConstant(Xq), - SE->getConstant(Yq), - X->getAssociatedLoop()); + X->setPoint(SE->getConstant(Xq), SE->getConstant(Yq), + X->getAssociatedSrcLoop(), X->getAssociatedDstLoop()); ++DeltaSuccesses; return true; } @@ -659,7 +667,6 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { // For debugging purposes. Dumps a dependence to OS. void Dependence::dump(raw_ostream &OS) const { - bool Splitable = false; if (isConfused()) OS << "confused"; else { @@ -673,41 +680,12 @@ void Dependence::dump(raw_ostream &OS) const { OS << "anti"; else if (isInput()) OS << "input"; - unsigned Levels = getLevels(); - OS << " ["; - for (unsigned II = 1; II <= Levels; ++II) { - if (isSplitable(II)) - Splitable = true; - if (isPeelFirst(II)) - OS << 'p'; - const SCEV *Distance = getDistance(II); - if (Distance) - OS << *Distance; - else if (isScalar(II)) - OS << "S"; - else { - unsigned Direction = getDirection(II); - if (Direction == DVEntry::ALL) - OS << "*"; - else { - if (Direction & DVEntry::LT) - OS << "<"; - if (Direction & DVEntry::EQ) - OS << "="; - if (Direction & DVEntry::GT) - OS << ">"; - } - } - if (isPeelLast(II)) - OS << 'p'; - if (II < Levels) - OS << " "; + dumpImp(OS); + unsigned SeparateLevels = getSeparateLevels(); + if (SeparateLevels > 0) { + OS << "! / assuming " << SeparateLevels << " fused loop(s): "; + dumpImp(OS, true); } - if (isLoopIndependent()) - OS << "|<"; - OS << "]"; - if (Splitable) - OS << " splitable"; } OS << "!\n"; @@ -718,6 +696,54 @@ void Dependence::dump(raw_ostream &OS) const { } } +// For debugging purposes. Dumps a dependence to OS with or without considering +// the separate levels. +void Dependence::dumpImp(raw_ostream &OS, bool Separate) const { + bool Splitable = false; + unsigned Levels = getLevels(); + unsigned SeparateLevels = getSeparateLevels(); + bool OnSeparates = false; + unsigned LevelNum = Levels; + if (Separate) + LevelNum += SeparateLevels; + OS << " ["; + for (unsigned II = 1; II <= LevelNum; ++II) { + if (!OnSeparates && inSeparateLoops(II)) + OnSeparates = true; + if (isSplitable(II, OnSeparates)) + Splitable = true; + if (isPeelFirst(II, OnSeparates)) + OS << 'p'; + const SCEV *Distance = getDistance(II, OnSeparates); + if (Distance) + OS << *Distance; + else if (isScalar(II, OnSeparates)) + OS << "S"; + else { + unsigned Direction = getDirection(II, OnSeparates); + if (Direction == DVEntry::ALL) + OS << "*"; + else { + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; + } + } + if (isPeelLast(II, OnSeparates)) + OS << 'p'; + if (II < LevelNum) + OS << " "; + } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; +} + // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their // underlaying objects. If LocA and LocB are known to not alias (for any reason: // tbaa, non-overlapping regions etc), then it is known there is no dependecy. @@ -768,6 +794,34 @@ bool isLoadOrStore(const Instruction *I) { return false; } +// Returns true if two loops are the same or they have the same tripcount and +// depth +bool DependenceInfo::areLoopsSimilar(const Loop *SrcLoop, + const Loop *DstLoop) const { + if (SrcLoop == DstLoop) + return true; + + if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth()) + return false; + + if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop || + !DstLoop->getLoopLatch()) + return false; + + const SCEV *SrcUB, *DstUP; + if (SE->hasLoopInvariantBackedgeTakenCount(SrcLoop)) + SrcUB = SE->getBackedgeTakenCount(SrcLoop); + if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop)) + DstUP = SE->getBackedgeTakenCount(DstLoop); + + if (SrcUB == nullptr || DstUP == nullptr) + return false; + + if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP)) + return true; + + return false; +} // Examines the loop nesting of the Src and Dst // instructions and establishes their shared loops. Sets the variables @@ -819,6 +873,20 @@ bool isLoadOrStore(const Instruction *I) { // e - 5 // f - 6 // g - 7 = MaxLevels +// SeparateLevels counts the number of levels after common levels that are +// not common but are similar, meaning that they have the same tripcount +// and depth. Assume that in this code fragment, levels c and e are +// similar. In this case only the loop nests at the next level after +// common levels are similar, and SeparateLevel is set to 1. +// If there are similar loop nests, we could use the APIs with considering them +// as fused loops. In that case the level numbers for the previous code look +// like +// a - 1 +// b - 2 +// c,e - 3 = CommonLevels +// d - 4 = SrcLevels +// f - 5 +// g - 6 = MaxLevels void DependenceInfo::establishNestingLevels(const Instruction *Src, const Instruction *Dst) { const BasicBlock *SrcBlock = Src->getParent(); @@ -829,6 +897,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src, const Loop *DstLoop = LI->getLoopFor(DstBlock); SrcLevels = SrcLevel; MaxLevels = SrcLevel + DstLevel; + SeparateLevels = 0; while (SrcLevel > DstLevel) { SrcLoop = SrcLoop->getParentLoop(); SrcLevel--; @@ -837,16 +906,20 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src, DstLoop = DstLoop->getParentLoop(); DstLevel--; } + + // find the first common level and count the separate levels leading to it while (SrcLoop != DstLoop) { + SeparateLevels++; + if (!areLoopsSimilar(SrcLoop, DstLoop)) + SeparateLevels = 0; + SrcLevel--; SrcLoop = SrcLoop->getParentLoop(); DstLoop = DstLoop->getParentLoop(); - SrcLevel--; } CommonLevels = SrcLevel; MaxLevels -= CommonLevels; } - // Given one of the loops containing the source, return // its level index in our numbering scheme. unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { @@ -1233,8 +1306,9 @@ bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, // // Return true if dependence disproved. bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, - const SCEV *DstConst, const Loop *CurLoop, - unsigned Level, FullDependence &Result, + const SCEV *DstConst, const Loop *CurSrcLoop, + const Loop *CurDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint) const { LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); @@ -1252,7 +1326,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); // check that |Delta| < iteration count - if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + if (const SCEV *UpperBound = + collectUpperBound(CurSrcLoop, Delta->getType())) { LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); const SCEV *AbsDelta = @@ -1285,7 +1360,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, return true; } Result.DV[Level].Distance = SE->getConstant(Distance); - NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); + NewConstraint.setDistance(SE->getConstant(Distance), CurSrcLoop, + CurDstLoop); if (Distance.sgt(0)) Result.DV[Level].Direction &= Dependence::DVEntry::LT; else if (Distance.slt(0)) @@ -1297,7 +1373,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, else if (Delta->isZero()) { // since 0/X == 0 Result.DV[Level].Distance = Delta; - NewConstraint.setDistance(Delta, CurLoop); + NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop); Result.DV[Level].Direction &= Dependence::DVEntry::EQ; ++StrongSIVsuccesses; } @@ -1305,13 +1381,12 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, if (Coeff->isOne()) { LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); Result.DV[Level].Distance = Delta; // since X/1 == X - NewConstraint.setDistance(Delta, CurLoop); + NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop); } else { Result.Consistent = false; - NewConstraint.setLine(Coeff, - SE->getNegativeSCEV(Coeff), - SE->getNegativeSCEV(Delta), CurLoop); + NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff), + SE->getNegativeSCEV(Delta), CurSrcLoop, CurDstLoop); } // maybe we can get a useful direction @@ -1339,7 +1414,6 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, return false; } - // weakCrossingSIVtest - // From the paper, Practical Dependence Testing, Section 4.2.2 // @@ -1370,8 +1444,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, // Return true if dependence disproved. bool DependenceInfo::weakCrossingSIVtest( const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, - const Loop *CurLoop, unsigned Level, FullDependence &Result, - Constraint &NewConstraint, const SCEV *&SplitIter) const { + const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint, + const SCEV *&SplitIter) const { LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); @@ -1382,7 +1457,7 @@ bool DependenceInfo::weakCrossingSIVtest( Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); - NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); + NewConstraint.setLine(Coeff, Coeff, Delta, CurSrcLoop, CurDstLoop); if (Delta->isZero()) { Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; @@ -1430,7 +1505,8 @@ bool DependenceInfo::weakCrossingSIVtest( // We're certain that Delta > 0 and ConstCoeff > 0. // Check Delta/(2*ConstCoeff) against upper loop bound - if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + if (const SCEV *UpperBound = + collectUpperBound(CurSrcLoop, Delta->getType())) { LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), @@ -1484,7 +1560,6 @@ bool DependenceInfo::weakCrossingSIVtest( return false; } - // Kirch's algorithm, from // // Optimizing Supercompilers for Supercomputers @@ -1570,7 +1645,8 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { // returns all the dependencies that exist between Dst and Src. bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst, - const Loop *CurLoop, unsigned Level, + const Loop *CurSrcLoop, + const Loop *CurDstLoop, unsigned Level, FullDependence &Result, Constraint &NewConstraint) const { LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); @@ -1585,7 +1661,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, - CurLoop); + CurSrcLoop, CurDstLoop); const SCEVConstant *ConstDelta = dyn_cast(Delta); const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); @@ -1612,7 +1688,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, bool UMValid = false; // UM is perhaps unavailable, let's check if (const SCEVConstant *CUB = - collectConstantUpperBound(CurLoop, Delta->getType())) { + collectConstantUpperBound(CurSrcLoop, Delta->getType())) { UM = CUB->getAPInt(); LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); UMValid = true; @@ -1719,7 +1795,6 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, return Result.DV[Level].Direction == Dependence::DVEntry::NONE; } - // Return true if the divisor evenly divides the dividend. static bool isRemainderZero(const SCEVConstant *Dividend, @@ -1761,12 +1836,10 @@ bool isRemainderZero(const SCEVConstant *Dividend, // (see also weakZeroDstSIVtest) // // Return true if dependence disproved. -bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurLoop, unsigned Level, - FullDependence &Result, - Constraint &NewConstraint) const { +bool DependenceInfo::weakZeroSrcSIVtest( + const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst, + const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint) const { // For the WeakSIV test, it's possible the loop isn't common to // the Src and Dst loops. If it isn't, then there's no need to // record a direction. @@ -1780,7 +1853,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, - CurLoop); + CurSrcLoop, CurDstLoop); LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { if (Level < CommonLevels) { @@ -1801,7 +1874,8 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, // check that Delta/SrcCoeff < iteration count // really check NewDelta < count*AbsCoeff - if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + if (const SCEV *UpperBound = + collectUpperBound(CurSrcLoop, Delta->getType())) { LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { @@ -1839,7 +1913,6 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, return false; } - // weakZeroDstSIVtest - // From the paper, Practical Dependence Testing, Section 4.2.2 // @@ -1871,12 +1944,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, // (see also weakZeroSrcSIVtest) // // Return true if dependence disproved. -bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, - const SCEV *SrcConst, - const SCEV *DstConst, - const Loop *CurLoop, unsigned Level, - FullDependence &Result, - Constraint &NewConstraint) const { +bool DependenceInfo::weakZeroDstSIVtest( + const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst, + const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level, + FullDependence &Result, Constraint &NewConstraint) const { // For the WeakSIV test, it's possible the loop isn't common to the // Src and Dst loops. If it isn't, then there's no need to record a direction. LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); @@ -1889,7 +1960,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, - CurLoop); + CurSrcLoop, CurDstLoop); LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { if (Level < CommonLevels) { @@ -1910,7 +1981,8 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, // check that Delta/SrcCoeff < iteration count // really check NewDelta < count*AbsCoeff - if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { + if (const SCEV *UpperBound = + collectUpperBound(CurSrcLoop, Delta->getType())) { LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { @@ -1948,7 +2020,6 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, return false; } - // exactRDIVtest - Tests the RDIV subscript pair for dependence. // Things of the form [c1 + a*i] and [c2 + b*j], // where i and j are induction variable, c1 and c2 are loop invariant, @@ -2240,43 +2311,47 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, const SCEV *DstConst = DstAddRec->getStart(); const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); - const Loop *CurLoop = SrcAddRec->getLoop(); - assert(CurLoop == DstAddRec->getLoop() && - "both loops in SIV should be same"); - Level = mapSrcLoop(CurLoop); + const Loop *CurSrcLoop = SrcAddRec->getLoop(); + const Loop *CurDstLoop = DstAddRec->getLoop(); + assert(areLoopsSimilar(CurSrcLoop, CurDstLoop) && + "both loops in SIV should be the same or have the same tripcount " + "and depth"); + Level = mapSrcLoop(CurSrcLoop); bool disproven; if (SrcCoeff == DstCoeff) - disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, - Level, Result, NewConstraint); + disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop, + CurDstLoop, Level, Result, NewConstraint); else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) - disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, - Level, Result, NewConstraint, SplitIter); + disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop, + CurDstLoop, Level, Result, NewConstraint, + SplitIter); else - disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, - Level, Result, NewConstraint); - return disproven || - gcdMIVtest(Src, Dst, Result) || - symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); + disproven = + exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop, + CurDstLoop, Level, Result, NewConstraint); + return disproven || gcdMIVtest(Src, Dst, Result) || + symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop, + CurDstLoop); } if (SrcAddRec) { const SCEV *SrcConst = SrcAddRec->getStart(); const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); const SCEV *DstConst = Dst; - const Loop *CurLoop = SrcAddRec->getLoop(); - Level = mapSrcLoop(CurLoop); - return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, - Level, Result, NewConstraint) || - gcdMIVtest(Src, Dst, Result); + const Loop *CurSrcLoop = SrcAddRec->getLoop(); + Level = mapSrcLoop(CurSrcLoop); + return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop, + CurSrcLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); } if (DstAddRec) { const SCEV *DstConst = DstAddRec->getStart(); const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); const SCEV *SrcConst = Src; - const Loop *CurLoop = DstAddRec->getLoop(); - Level = mapDstLoop(CurLoop); - return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, - CurLoop, Level, Result, NewConstraint) || - gcdMIVtest(Src, Dst, Result); + const Loop *CurDstLoop = DstAddRec->getLoop(); + Level = mapDstLoop(CurDstLoop); + return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurDstLoop, + CurDstLoop, Level, Result, NewConstraint) || + gcdMIVtest(Src, Dst, Result); } llvm_unreachable("SIV test expected at least one AddRec"); return false; @@ -3181,19 +3256,20 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, Constraint &CurConstraint, bool &Consistent) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop(); + const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop(); LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); - const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *A_K = findCoefficient(Src, CurSrcLoop); if (A_K->isZero()) return false; const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); Src = SE->getMinusSCEV(Src, DA_K); - Src = zeroCoefficient(Src, CurLoop); + Src = zeroCoefficient(Src, CurSrcLoop); LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); - Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); + Dst = addToCoefficient(Dst, CurDstLoop, SE->getNegativeSCEV(A_K)); LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); - if (!findCoefficient(Dst, CurLoop)->isZero()) + if (!findCoefficient(Dst, CurDstLoop)->isZero()) Consistent = false; return true; } @@ -3207,7 +3283,8 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, Constraint &CurConstraint, bool &Consistent) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); + const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop(); + const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop(); const SCEV *A = CurConstraint.getA(); const SCEV *B = CurConstraint.getB(); const SCEV *C = CurConstraint.getC(); @@ -3223,11 +3300,11 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, APInt Charlie = Cconst->getAPInt(); APInt CdivB = Charlie.sdiv(Beta); assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); - const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const SCEV *AP_K = findCoefficient(Dst, CurDstLoop); // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); - Dst = zeroCoefficient(Dst, CurLoop); - if (!findCoefficient(Src, CurLoop)->isZero()) + Dst = zeroCoefficient(Dst, CurDstLoop); + if (!findCoefficient(Src, CurSrcLoop)->isZero()) Consistent = false; } else if (B->isZero()) { @@ -3238,10 +3315,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); - const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *A_K = findCoefficient(Src, CurSrcLoop); Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); - Src = zeroCoefficient(Src, CurLoop); - if (!findCoefficient(Dst, CurLoop)->isZero()) + Src = zeroCoefficient(Src, CurSrcLoop); + if (!findCoefficient(Dst, CurDstLoop)->isZero()) Consistent = false; } else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { @@ -3252,22 +3329,22 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); - const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *A_K = findCoefficient(Src, CurSrcLoop); Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); - Src = zeroCoefficient(Src, CurLoop); - Dst = addToCoefficient(Dst, CurLoop, A_K); - if (!findCoefficient(Dst, CurLoop)->isZero()) + Src = zeroCoefficient(Src, CurSrcLoop); + Dst = addToCoefficient(Dst, CurDstLoop, A_K); + if (!findCoefficient(Dst, CurDstLoop)->isZero()) Consistent = false; } else { // paper is incorrect here, or perhaps just misleading - const SCEV *A_K = findCoefficient(Src, CurLoop); + const SCEV *A_K = findCoefficient(Src, CurSrcLoop); Src = SE->getMulExpr(Src, A); Dst = SE->getMulExpr(Dst, A); Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); - Src = zeroCoefficient(Src, CurLoop); - Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); - if (!findCoefficient(Dst, CurLoop)->isZero()) + Src = zeroCoefficient(Src, CurSrcLoop); + Dst = addToCoefficient(Dst, CurDstLoop, SE->getMulExpr(A_K, B)); + if (!findCoefficient(Dst, CurDstLoop)->isZero()) Consistent = false; } LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); @@ -3281,17 +3358,18 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, // Return true if some simplification occurs. bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, Constraint &CurConstraint) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); - const SCEV *A_K = findCoefficient(Src, CurLoop); - const SCEV *AP_K = findCoefficient(Dst, CurLoop); + const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop(); + const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop(); + const SCEV *A_K = findCoefficient(Src, CurSrcLoop); + const SCEV *AP_K = findCoefficient(Dst, CurDstLoop); const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); - Src = zeroCoefficient(Src, CurLoop); + Src = zeroCoefficient(Src, CurSrcLoop); LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); - Dst = zeroCoefficient(Dst, CurLoop); + Dst = zeroCoefficient(Dst, CurDstLoop); LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); return true; } @@ -3690,14 +3768,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } } - establishNestingLevels(Src, Dst); - LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); - LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); - - FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE), - PossiblyLoopIndependent, CommonLevels); - ++TotalArrayPairs; - unsigned Pairs = 1; SmallVector Pair(Pairs); Pair[0].Src = SrcSCEV; @@ -3710,6 +3780,47 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } } + // Establish loop nesting levels considering separate but similar loops as + // common + establishNestingLevels(Src, Dst); + + LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + LLVM_DEBUG(dbgs() << " separate nesting levels = " << SeparateLevels + << "\n"); + + // Modify common levels to consider the similar separate levels in the tests + CommonLevels += SeparateLevels; + MaxLevels -= SeparateLevels; + if (SeparateLevels > 0) { + // Not all tests are handled yet over separate loops + // Revoke if there are any tests other than ZIV, SIV or RDIV + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Subscript::ClassificationKind TestClass = classifyPair( + Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst, + LI->getLoopFor(Dst->getParent()), Pair[P].Loops); + + if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV && + TestClass != Subscript::RDIV) { + // Revert the levels to not consider the separate levels + CommonLevels -= SeparateLevels; + MaxLevels += SeparateLevels; + SeparateLevels = 0; + break; + } + } + } + + FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE), + PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + if (SeparateLevels > 0) { + Result.Consistent = false; + SeparateLoopsConsidered++; + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); @@ -3999,6 +4110,25 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; + if (SeparateLevels > 0) { + // Extracting separate levels from the common levels + // Reverting CommonLevels and MaxLevels to their original values + assert(CommonLevels >= SeparateLevels); + CommonLevels -= SeparateLevels; + MaxLevels += SeparateLevels; + std::unique_ptr DV, DVSeparate; + DV = std::make_unique(CommonLevels); + DVSeparate = std::make_unique(SeparateLevels); + for (unsigned level = 0; level < CommonLevels; ++level) + DV[level] = Result.DV[level]; + for (unsigned level = 0; level < SeparateLevels; ++level) + DVSeparate[level] = Result.DV[CommonLevels + level]; + Result.DV = std::move(DV); + Result.DVSeparate = std::move(DVSeparate); + Result.Levels = CommonLevels; + Result.SeparateLevels = SeparateLevels; + } + if (PossiblyLoopIndependent) { // Make sure the LoopIndependent flag is set correctly. // All directions must include equal, otherwise no diff --git a/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll new file mode 100644 index 0000000000000..3d7fd12e735d3 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll @@ -0,0 +1,145 @@ +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: -da-disable-delinearization-checks | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + +;; for (long int i = 0; i < n; i++) { +;; for (long int j = 0; j < n; j++) { +;; for (long int k = 0; k < n; k++) { +;; for (long int l = 0; l < n; l++) +;; A[i][j][k][l] = i; +;; } +;; for (long int k = 1; k < n+1; k++) { +;; for (long int l = 0; l < n; l++) +;; *B++ = A[i + 4][j + 3][k + 2][l + 1]; + +define void @SIVSeparate(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { +entry: + %cmp10 = icmp sgt i64 %n, 0 + br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end35 + +; CHECK-LABEL: SIVSeparate +; CHECK: da analyze - none! +; CHECK: da analyze - flow [-4 -3]! / assuming 2 fused loop(s): [-4 -3 -3 -1]! +; CHECK: da analyze - confused! +; CHECK: da analyze - none! +; CHECK: da analyze - confused! +; CHECK: da analyze - output [* * * *]! + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc33 + %B.addr.014 = phi ptr [ %B.addr.1.lcssa, %for.inc33 ], [ %B, %for.cond1.preheader.preheader ] + %i.013 = phi i64 [ %inc34, %for.inc33 ], [ 0, %for.cond1.preheader.preheader ] + %cmp28 = icmp sgt i64 %n, 0 + br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.inc33 + +for.cond4.preheader.preheader: ; preds = %for.cond1.preheader + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc30 + %B.addr.110 = phi ptr [ %B.addr.2.lcssa, %for.inc30 ], [ %B.addr.014, %for.cond4.preheader.preheader ] + %j.09 = phi i64 [ %inc31, %for.inc30 ], [ 0, %for.cond4.preheader.preheader ] + %cmp53 = icmp sgt i64 %n, 0 + br i1 %cmp53, label %for.cond7.preheader.preheader, label %for.cond15.loopexit + +for.cond7.preheader.preheader: ; preds = %for.cond4.preheader + br label %for.cond7.preheader + +for.cond7.preheader: ; preds = %for.cond7.preheader.preheader, %for.inc12 + %k.07 = phi i64 [ %inc13, %for.inc12 ], [ 0, %for.cond7.preheader.preheader ] + %cmp81 = icmp sgt i64 %n, 0 + br i1 %cmp81, label %for.body9.preheader, label %for.inc12 + +for.body9.preheader: ; preds = %for.cond7.preheader + br label %for.body9 + +for.body9: ; preds = %for.body9.preheader, %for.body9 + %l.02 = phi i64 [ %inc11, %for.body9 ], [ 0, %for.body9.preheader ] + %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %i.013, i64 %j.09, i64 %k.07, i64 %l.02 + store i64 %i.013, ptr %arrayidx12, align 8 + %inc11 = add nsw i64 %l.02, 1 + %exitcond15 = icmp ne i64 %inc11, %n + br i1 %exitcond15, label %for.body9, label %for.inc12.loopexit + +for.inc12.loopexit: ; preds = %for.body9 + br label %for.inc12 + +for.inc12: ; preds = %for.inc12.loopexit, %for.cond7.preheader + %inc13 = add nsw i64 %k.07, 1 + %exitcond16 = icmp ne i64 %inc13, %n + br i1 %exitcond16, label %for.cond7.preheader, label %for.cond15.loopexit.loopexit + +for.cond15.loopexit.loopexit: ; preds = %for.inc12 + br label %for.cond15.loopexit + +for.cond15.loopexit: ; preds = %for.cond15.loopexit.loopexit, %for.cond4.preheader + %cmp163 = icmp sgt i64 %n, 0 + br i1 %cmp163, label %for.cond18.preheader.preheader, label %for.inc30 + +for.cond18.preheader.preheader: ; preds = %for.cond15.loopexit + br label %for.cond18.preheader + +for.cond18.preheader: ; preds = %for.cond18.preheader.preheader, %for.inc27 + %k14.06 = phi i64 [ %inc28, %for.inc27 ], [ 1, %for.cond18.preheader.preheader ] + %B.addr.25 = phi ptr [ %B.addr.3.lcssa, %for.inc27 ], [ %B.addr.110, %for.cond18.preheader.preheader ] + %cmp191 = icmp sgt i64 %n, 0 + br i1 %cmp191, label %for.body20.preheader, label %for.inc27 + +for.body20.preheader: ; preds = %for.cond18.preheader + br label %for.body20 + +for.body20: ; preds = %for.body20.preheader, %for.body20 + %l17.04 = phi i64 [ %inc25, %for.body20 ], [ 0, %for.body20.preheader ] + %B.addr.34 = phi ptr [ %incdec.ptr, %for.body20 ], [ %B.addr.25, %for.body20.preheader ] + %add = add nsw i64 %l17.04, 1 + %add21 = add nsw i64 %k14.06, 2 + %add22 = add nsw i64 %j.09, 3 + %add23 = add nsw i64 %i.013, 4 + %arrayidx24 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %add23, i64 %add22, i64 %add21, i64 %add + %0 = load i64, ptr %arrayidx24, align 8 + %incdec.ptr = getelementptr inbounds i64, ptr %B.addr.34, i64 1 + store i64 %0, ptr %B.addr.34, align 8 + %inc25 = add nsw i64 %l17.04, 1 + %exitcond = icmp ne i64 %inc25, %n + br i1 %exitcond, label %for.body20, label %for.inc27.loopexit + +for.inc27.loopexit: ; preds = %for.body20 + %scevgep = getelementptr i64, ptr %B.addr.25, i64 %n + br label %for.inc27 + +for.inc27: ; preds = %for.inc27.loopexit, %for.cond18.preheader + %B.addr.3.lcssa = phi ptr [ %B.addr.25, %for.cond18.preheader ], [ %scevgep, %for.inc27.loopexit ] + %inc28 = add nsw i64 %k14.06, 1 + %inc29 = add nsw i64 %n, 1 + %exitcond17 = icmp ne i64 %inc28, %inc29 + br i1 %exitcond17, label %for.cond18.preheader, label %for.inc30.loopexit + +for.inc30.loopexit: ; preds = %for.inc27 + %B.addr.3.lcssa.lcssa = phi ptr [ %B.addr.3.lcssa, %for.inc27 ] + br label %for.inc30 + +for.inc30: ; preds = %for.inc30.loopexit, %for.cond15.loopexit + %B.addr.2.lcssa = phi ptr [ %B.addr.110, %for.cond15.loopexit ], [ %B.addr.3.lcssa.lcssa, %for.inc30.loopexit ] + %inc31 = add nsw i64 %j.09, 1 + %exitcond18 = icmp ne i64 %inc31, %n + br i1 %exitcond18, label %for.cond4.preheader, label %for.inc33.loopexit + +for.inc33.loopexit: ; preds = %for.inc30 + %B.addr.2.lcssa.lcssa = phi ptr [ %B.addr.2.lcssa, %for.inc30 ] + br label %for.inc33 + +for.inc33: ; preds = %for.inc33.loopexit, %for.cond1.preheader + %B.addr.1.lcssa = phi ptr [ %B.addr.014, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc33.loopexit ] + %inc34 = add nsw i64 %i.013, 1 + %exitcond19 = icmp ne i64 %inc34, %n + br i1 %exitcond19, label %for.cond1.preheader, label %for.end35.loopexit + +for.end35.loopexit: ; preds = %for.inc33 + br label %for.end35 + +for.end35: ; preds = %for.end35.loopexit, %entry + ret void +}