diff --git a/clang/include/clang/3C/3CInteractiveData.h b/clang/include/clang/3C/3CInteractiveData.h index 2c72045fc575..77283baf9cce 100644 --- a/clang/include/clang/3C/3CInteractiveData.h +++ b/clang/include/clang/3C/3CInteractiveData.h @@ -52,6 +52,18 @@ class ConstraintsInfo { std::set ValidSourceFiles; std::map AtomSourceMap; + void addRootCause(ConstraintKey Var, ConstraintKey RootCause) { + RootCauses[Var].insert(RootCause); + } + + CVars& getConstrainedBy(VarAtom *Var) { + return getConstrainedBy(Var->getLoc()); + } + + CVars& getConstrainedBy(ConstraintKey Var) { + return ConstrainedBy[Var]; + } + private: // Root cause map: This is the map of a Constraint var and a set of // Constraint vars (that are directly assigned WILD) which are the reason @@ -65,15 +77,19 @@ class ConstraintsInfo { // \ / // s // Here: s -> {p, q} and r -> {q} - std::map RCMap; + // IE: Maps a constraint variables to the set of root causes of wildness + std::map RootCauses; // This is source map: Map of Constraint var (which are directly // assigned WILD) and the set of constraint vars which are WILD because of // the above constraint. // For the above case, this contains: p -> {s}, q -> {r, s} - std::map SrcWMap; + // IE: Maps a root cause to the set of variables it constrains + std::map ConstrainedBy; - std::map PtrRCMap; - std::map> PtrSrcWMap; + // PTR versions of the above maps + // TODO understand this better + std::map PtrRootCauses; + std::map> PtrConstrainedBy; // Get score for each of the ConstraintKeys, which are wild. // For the above example, the score of s would be 0.5, similarly diff --git a/clang/include/clang/3C/ConstraintVariables.h b/clang/include/clang/3C/ConstraintVariables.h index d0ae7f678925..d701ce5d58d0 100644 --- a/clang/include/clang/3C/ConstraintVariables.h +++ b/clang/include/clang/3C/ConstraintVariables.h @@ -168,6 +168,7 @@ class ConstraintVariable { std::string getRewritableOriginalTy() const; std::string getName() const { return Name; } + // TODO is the word `valid` doing any real work here? or can it be dropped? void setValidDecl() { IsForDecl = true; } bool isForValidDecl() const { return IsForDecl; } diff --git a/clang/include/clang/3C/Constraints.h b/clang/include/clang/3C/Constraints.h index 278878736ac1..f15f3fa93c41 100644 --- a/clang/include/clang/3C/Constraints.h +++ b/clang/include/clang/3C/Constraints.h @@ -129,7 +129,11 @@ class VarAtom : public Atom { return false; } + // TODO this should be renamed to something more informative, like "id" uint32_t getLoc() const { return Loc; } + + + std::string getName() const { return Name; } VarKind getVarKind() const { return KindV; } @@ -138,10 +142,19 @@ class VarAtom : public Atom { return Constraints; } + void setForDecl(void) { + IsForDecl = true; + } + + bool isForDecl(void) const { + return IsForDecl; + } + private: uint32_t Loc; std::string Name; const VarKind KindV; + bool IsForDecl = false; // The constraint expressions where this variable is mentioned on the // LHS of an equality. std::set> Constraints; diff --git a/clang/include/clang/3C/ProgramInfo.h b/clang/include/clang/3C/ProgramInfo.h index 5e691c08bc77..3633207f72ef 100644 --- a/clang/include/clang/3C/ProgramInfo.h +++ b/clang/include/clang/3C/ProgramInfo.h @@ -100,6 +100,11 @@ class ProgramInfo : public ProgramVariableAdder { FVConstraint *getStaticFuncConstraint(std::string FuncName, std::string FileName) const; + void doRootCauseAnalysis(CVars &RelevantVarsKey, + std::set &DirectWildVarAtoms, + ConstraintsGraph &CG); + + // Called when we are done adding constraints and visiting ASTs. // Links information about global symbols together and adds // constraints where appropriate. diff --git a/clang/lib/3C/3CInteractiveData.cpp b/clang/lib/3C/3CInteractiveData.cpp index bc6d417f513a..f454a213af3d 100644 --- a/clang/lib/3C/3CInteractiveData.cpp +++ b/clang/lib/3C/3CInteractiveData.cpp @@ -19,14 +19,14 @@ void ConstraintsInfo::clear() { AllWildAtoms.clear(); TotalNonDirectWildAtoms.clear(); ValidSourceFiles.clear(); - RCMap.clear(); - SrcWMap.clear(); + RootCauses.clear(); + ConstrainedBy.clear(); } -CVars &ConstraintsInfo::getRCVars(ConstraintKey Ckey) { return RCMap[Ckey]; } +CVars &ConstraintsInfo::getRCVars(ConstraintKey Ckey) { return RootCauses[Ckey]; } CVars &ConstraintsInfo::getSrcCVars(ConstraintKey Ckey) { - return SrcWMap[Ckey]; + return ConstrainedBy[Ckey]; } CVars ConstraintsInfo::getWildAffectedCKeys(const CVars &DWKeys) { @@ -50,7 +50,7 @@ float ConstraintsInfo::getPtrAffectedScore( const std::set CVs) { float TS = 0.0; for (auto *CV : CVs) - TS += (1.0 / PtrRCMap[CV].size()); + TS += (1.0 / PtrRootCauses[CV].size()); return TS; } @@ -127,12 +127,12 @@ void ConstraintsInfo::printConstraintStats(llvm::raw_ostream &O, O << "\"AtomsAffected\":" << AtomsAffected.size() << ", "; O << "\"AtomsScore\":" << getAtomAffectedScore(AtomsAffected) << ", "; - std::set PtrsAffected = PtrSrcWMap[Cause]; + std::set PtrsAffected = PtrConstrainedBy[Cause]; O << "\"PtrsAffected\":" << PtrsAffected.size() << ","; O << "\"PtrsScore\":" << getPtrAffectedScore(PtrsAffected); O << "}"; } int ConstraintsInfo::getNumPtrsAffected(ConstraintKey CK) { - return PtrSrcWMap[CK].size(); + return PtrConstrainedBy[CK].size(); } diff --git a/clang/lib/3C/ProgramInfo.cpp b/clang/lib/3C/ProgramInfo.cpp index 90c4c692f4fc..3fd9de39a67a 100644 --- a/clang/lib/3C/ProgramInfo.cpp +++ b/clang/lib/3C/ProgramInfo.cpp @@ -14,6 +14,8 @@ #include "clang/3C/MappingVisitor.h" #include "clang/3C/Utils.h" #include "llvm/Support/JSON.h" +#include +#include #include using namespace clang; @@ -954,6 +956,201 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, return nullptr; } + +typedef llvm::SmallPtrSet VarAtomSet; +typedef llvm::DenseSet> ConstraintKeySet; + +// Factory context for root cause analysis +// This class tracks global root cause analysis information +class RCAFactory { +private: + // Set of vars that map to a decl & are in a writable file + CVars &RelevantVarsKeys; + // Set of vars that are directly wild + std::set &DirectWildVarAtoms; + + ConstraintsGraph &CG; + ConstraintsInfo &CState; + + + // Map a key (K) to the set of keys reachable by K + // This functions as the memo-pad + // We use the more efficient LLVM set types here + llvm::DenseMap> ReachableBy; + + +public: + RCAFactory(CVars &RVs, std::set &DWVs, + ConstraintsGraph &CG, ConstraintsInfo &CState) + : RelevantVarsKeys(RVs), DirectWildVarAtoms(DWVs), + CG(CG), CState(CState) {} + + + + void analyzeRootCause(VarAtom*); + + // Mark ToV as being reachable from FromV + // Check nodes reachable from ToV, and add them as well + // TODO there are gains to be made by optimizing this function + void markReachable(VarAtom* FromV, VarAtom *ToV) { + auto From = FromV->getLoc(), To = ToV->getLoc(); + + ReachableBy[From].insert(ToV); + // Check if To has reachable nodes, if so add them + if (ReachableBy.count(To) != 0) + ReachableBy[From].insert(ReachableBy[To].begin(), ReachableBy[To].end()); + } + + // Check if a given VarAtom has had reachability data logged yet + bool memoized(VarAtom *VA) { + return ReachableBy.count(VA->getLoc()) != 0; + } + + VarAtomSet& getReachable(VarAtom *VA) { + assert("Should only be called on memoized values" && memoized(VA)); + return ReachableBy[VA->getLoc()]; + } + + + bool isRelevantVar(VarAtom *VA) { + return isRelevantVar(VA->getLoc()); + } + + bool isRelevantVar(ConstraintKey Key) { + return RelevantVarsKeys.find(Key) != RelevantVarsKeys.end(); + } + + bool isDirectlyWild(VarAtom *VA) { + return DirectWildVarAtoms.find(VA) != DirectWildVarAtoms.end(); + } + + void addRootCause(VarAtom *Target, VarAtom *Cause) { + CState.addRootCause(Target->getLoc(), Cause->getLoc()); + } + + void addRootCause(ConstraintKey Target, VarAtom *Cause) { + CState.addRootCause(Target, Cause->getLoc()); + } + + std::set getNeighbors(VarAtom *Node) { + std::set Neighbors; + CG.getNeighbors(Node, Neighbors, true); + return Neighbors; + } + +}; + +// This class performs the root cause analysis on a single wild atom +// It searches through the Constraint Graph and finds every atom constrained +// by the target wild atom. +class RootCauseAnalysis { +public: + + RootCauseAnalysis(RCAFactory *F, VarAtom *WA) : F(F), WildAtom(WA) { + // Begin traversal out from the root cause of wildness + traverse(WA); + } + + // The set of all relevant variables constrained by the target + CVars& getConstrainedBy(void) { + return ConstrainedByThis; + } + +private: + // Factory Context + RCAFactory *F; + // The target of the search + VarAtom *WildAtom; + // Set of variables constrained by the target + CVars ConstrainedByThis; + // Set of variables indirectly constrained + CVars Indirect; + // Set of vars we've seen in this search (prevents cycles) + ConstraintKeySet Seen; + + + bool alreadySeen(VarAtom *VA) { + return Seen.find(VA->getLoc()) != Seen.end(); + } + + void markSeen(VarAtom *VA) { + Seen.insert(VA->getLoc()); + } + + + void traverse(VarAtom *ReachableVar) { + if (alreadySeen(ReachableVar)) + return; + markSeen(ReachableVar); + if (ReachableVar->isForDecl()) { + F->addRootCause(ReachableVar, WildAtom); + + if (F->isRelevantVar(ReachableVar)) + ConstrainedByThis.insert(ReachableVar->getLoc()); + if (!F->isDirectlyWild(ReachableVar)) + Indirect.insert(ReachableVar->getLoc()); + } + + if (F->memoized(ReachableVar)) + traverseMemoizedNode(ReachableVar); + else + traverseNewNode(ReachableVar); + } + + +private: + void traverseMemoizedNode(VarAtom *VA) { + for (VarAtom *K : F->getReachable(VA)) { + if (K->isForDecl()) { + F->addRootCause(K, WildAtom); + if (F->isRelevantVar(K)) + ConstrainedByThis.insert(K->getLoc()); + } + } + } + + void traverseNewNode(VarAtom *ReachableVar) { + std::set Neighbors = F->getNeighbors(ReachableVar); + for (auto *Neighbor : Neighbors) { + if (auto *VarNeighbor = dyn_cast(Neighbor)) { + traverse(VarNeighbor); + // Mark our neighbor (and all transitively reachable nodes) as reachable + F->markReachable(ReachableVar, VarNeighbor); + } + } + } + +}; + + +void RCAFactory::analyzeRootCause(VarAtom *DirectWild) { + CState.AllWildAtoms.insert(DirectWild->getLoc()); + + // Perform root cause analysis + RootCauseAnalysis RCA(this, DirectWild); + CVars &TotalConstrainedBy = CState.getConstrainedBy(DirectWild); + // Add all the new constraints we found into our total set + CVars &NewConstraints = RCA.getConstrainedBy(); + TotalConstrainedBy.insert(NewConstraints.begin(), NewConstraints.end()); +} + +void ProgramInfo::doRootCauseAnalysis(CVars &RelevantVarsKey, + std::set &DirectWildVarAtoms, + ConstraintsGraph &CG) { + + RCAFactory RCAF(RelevantVarsKey, DirectWildVarAtoms, CG, CState); + + // Analyze the root causes for every directly wild atom + for (auto *WildAtom : DirectWildVarAtoms) + if (auto *WildVarAtom = dyn_cast(WildAtom)) + RCAF.analyzeRootCause(WildVarAtom); + + findIntersection(CState.AllWildAtoms, RelevantVarsKey, CState.InSrcWildAtoms); + findIntersection(CState.TotalNonDirectWildAtoms, RelevantVarsKey, + CState.InSrcNonDirectWildAtoms); + +} + // From the given constraint graph, this method computes the interim constraint // state that contains constraint vars which are directly assigned WILD and // other constraint vars that have been determined to be WILD because they @@ -961,80 +1158,46 @@ FVConstraint *ProgramInfo::getStaticFuncConstraint(std::string FuncName, bool ProgramInfo::computeInterimConstraintState( const std::set &FilePaths) { - // Get all the valid vars of interest i.e., all the Vars that are present - // in one of the files being compiled. - CAtoms ValidVarsVec; - std::set AllValidVars; + // The set of all DeclVars vars in a writable file, which we call _relevant_ + std::set RelevantVars; + + // Compute the above two sets + + CVarSet Visited; - CAtoms Tmp; for (const auto &I : Variables) { std::string FileName = I.first.getFileName(); ConstraintVariable *C = I.second; if (C->isForValidDecl()) { - Tmp.clear(); + CAtoms Tmp; getVarsFromConstraint(C, Tmp, Visited); - AllValidVars.insert(Tmp.begin(), Tmp.end()); + // TODO setting this flag should likely being done earlier, + // during construction. + for (auto *A : Tmp) + if (auto *VA = dyn_cast(A)) + VA->setForDecl(); if (canWrite(FileName)) - ValidVarsVec.insert(ValidVarsVec.begin(), Tmp.begin(), Tmp.end()); + RelevantVars.insert(Tmp.begin(), Tmp.end()); } } - // Make that into set, for efficiency. - std::set ValidVarsS; - ValidVarsS.insert(ValidVarsVec.begin(), ValidVarsVec.end()); auto GetLocOrZero = [](const Atom *Val) { if (const auto *VA = dyn_cast(Val)) return VA->getLoc(); return (ConstraintKey)0; }; - CVars ValidVarsKey; - std::transform(ValidVarsS.begin(), ValidVarsS.end(), - std::inserter(ValidVarsKey, ValidVarsKey.end()), GetLocOrZero); - CVars AllValidVarsKey; - std::transform(AllValidVars.begin(), AllValidVars.end(), - std::inserter(AllValidVarsKey, AllValidVarsKey.end()), - GetLocOrZero); + //Map the above set into equivalent set of keys + CVars RelevantVarsKey; + std::transform(RelevantVars.begin(), RelevantVars.end(), + std::inserter(RelevantVarsKey, RelevantVarsKey.end()), GetLocOrZero); CState.clear(); + std::set DirectWildVarAtoms; CS.getChkCG().getSuccessors(CS.getWild(), DirectWildVarAtoms); - CVars TmpCGrp; - CVars OnlyIndirect; - for (auto *A : DirectWildVarAtoms) { - auto *VA = dyn_cast(A); - if (VA == nullptr) - continue; - - TmpCGrp.clear(); - OnlyIndirect.clear(); - - auto BFSVisitor = [&](Atom *SearchAtom) { - auto *SearchVA = dyn_cast(SearchAtom); - if (SearchVA && AllValidVars.find(SearchVA) != AllValidVars.end()) { - CState.RCMap[SearchVA->getLoc()].insert(VA->getLoc()); - - if (ValidVarsKey.find(SearchVA->getLoc()) != ValidVarsKey.end()) - TmpCGrp.insert(SearchVA->getLoc()); - if (DirectWildVarAtoms.find(SearchVA) == DirectWildVarAtoms.end()) { - OnlyIndirect.insert(SearchVA->getLoc()); - } - } - }; - CS.getChkCG().visitBreadthFirst(VA, BFSVisitor); - - CState.TotalNonDirectWildAtoms.insert(OnlyIndirect.begin(), - OnlyIndirect.end()); - // Should we consider only pointers which with in the source files or - // external pointers that affected pointers within the source files. - CState.AllWildAtoms.insert(VA->getLoc()); - CVars &CGrp = CState.SrcWMap[VA->getLoc()]; - CGrp.insert(TmpCGrp.begin(), TmpCGrp.end()); - } - findIntersection(CState.AllWildAtoms, ValidVarsKey, CState.InSrcWildAtoms); - findIntersection(CState.TotalNonDirectWildAtoms, ValidVarsKey, - CState.InSrcNonDirectWildAtoms); + doRootCauseAnalysis(RelevantVarsKey, DirectWildVarAtoms, CS.getChkCG()); // The ConstraintVariable for a variable normally appears in Variables for the // definition, but it may also be reused directly in ExprConstraintVars for a @@ -1123,17 +1286,17 @@ void ProgramInfo::computePtrLevelStats() { insertCVAtoms(I.second, AtomPtrMap); // Populate maps with per-pointer root cause information - for (auto Entry : CState.RCMap) { - assert("RCMap entry is not mapped to a pointer!" && + for (auto Entry : CState.RootCauses) { + assert("RootCauses entry is not mapped to a pointer!" && AtomPtrMap.find(Entry.first) != AtomPtrMap.end()); ConstraintVariable *CV = AtomPtrMap[Entry.first]; for (auto RC : Entry.second) - CState.PtrRCMap[CV].insert(RC); + CState.PtrRootCauses[CV].insert(RC); } - for (auto Entry : CState.SrcWMap) { + for (auto Entry : CState.ConstrainedBy) { for (auto Key : Entry.second) { assert(AtomPtrMap.find(Key) != AtomPtrMap.end()); - CState.PtrSrcWMap[Entry.first].insert(AtomPtrMap[Key]); + CState.PtrConstrainedBy[Entry.first].insert(AtomPtrMap[Key]); } } }