52#include <unordered_map>
57#define DEBUG_TYPE "memprof-context-disambiguation"
60 "Number of function clones created during whole program analysis");
62 "Number of function clones created during ThinLTO backend");
64 "Number of functions that had clones created during ThinLTO backend");
66 FunctionCloneDuplicatesThinBackend,
67 "Number of function clone duplicates detected during ThinLTO backend");
68STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
69 "cloned) during whole program analysis");
70STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
71 "during whole program analysis");
73 "Number of not cold static allocations (possibly cloned) during "
75STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
76 "(possibly cloned) during ThinLTO backend");
78 "Number of original (not cloned) allocations with memprof profiles "
79 "during ThinLTO backend");
81 AllocVersionsThinBackend,
82 "Number of allocation versions (including clones) during ThinLTO backend");
84 "Maximum number of allocation versions created for an original "
85 "allocation during ThinLTO backend");
87 "Number of unclonable ambigous allocations during ThinLTO backend");
89 "Number of edges removed due to mismatched callees (profiled vs IR)");
91 "Number of profiled callees found via tail calls");
93 "Aggregate depth of profiled callees found via tail calls");
95 "Maximum depth of profiled callees found via tail calls");
97 "Number of profiled callees found via multiple tail call chains");
98STATISTIC(DeferredBackedges,
"Number of backedges with deferred cloning");
99STATISTIC(NewMergedNodes,
"Number of new nodes created during merging");
100STATISTIC(NonNewMergedNodes,
"Number of non new nodes used during merging");
102 "Number of missing alloc nodes for context ids");
104 "Number of calls skipped during cloning due to unexpected operand");
106 "Number of callsites assigned to call multiple non-matching clones");
107STATISTIC(TotalMergeInvokes,
"Number of merge invocations for nodes");
108STATISTIC(TotalMergeIters,
"Number of merge iterations for nodes");
109STATISTIC(MaxMergeIters,
"Max merge iterations for nodes");
110STATISTIC(NumImportantContextIds,
"Number of important context ids");
111STATISTIC(NumFixupEdgeIdsInserted,
"Number of fixup edge ids inserted");
112STATISTIC(NumFixupEdgesAdded,
"Number of fixup edges added");
113STATISTIC(NumFixedContexts,
"Number of contexts with fixed edges");
118 cl::desc(
"Specify the path prefix of the MemProf dot files."));
122 cl::desc(
"Export graph to dot files."));
127 cl::desc(
"Iteratively apply merging on a node to catch new callers"));
137 "memprof-dot-scope",
cl::desc(
"Scope of graph to export to dot"),
142 "Export only nodes with contexts feeding given "
143 "-memprof-dot-alloc-id"),
145 "Export only nodes with given -memprof-dot-context-id")));
149 cl::desc(
"Id of alloc to export if -memprof-dot-scope=alloc "
150 "or to highlight if -memprof-dot-scope=all"));
154 cl::desc(
"Id of context to export if -memprof-dot-scope=context or to "
155 "highlight otherwise"));
159 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
163 cl::desc(
"Perform verification checks on CallingContextGraph."));
167 cl::desc(
"Perform frequent verification checks on nodes."));
170 "memprof-import-summary",
171 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
177 cl::desc(
"Max depth to recursively search for missing "
178 "frames through tail calls."));
183 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
187 cl::desc(
"Allow cloning of contexts through recursive cycles"));
194 cl::desc(
"Merge clones before assigning functions"));
203 cl::desc(
"Allow cloning of contexts having recursive cycles"));
209 cl::desc(
"Minimum absolute count for promoted target to be inlinable"));
220 cl::desc(
"Linking with hot/cold operator new interfaces"));
225 "Require target function definition when promoting indirect calls"));
232 cl::desc(
"Number of largest cold contexts to consider important"));
236 cl::desc(
"Enables edge fixup for important contexts"));
258template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
259class CallsiteContextGraph {
261 CallsiteContextGraph() =
default;
262 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
263 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
270 void identifyClones();
277 bool assignFunctions();
284 const CallsiteContextGraph &CCG) {
290 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
292 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
294 void exportToDot(std::string Label)
const;
297 struct FuncInfo final
298 :
public std::pair<FuncTy *, unsigned > {
299 using Base = std::pair<FuncTy *, unsigned>;
301 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
302 explicit operator bool()
const {
return this->first !=
nullptr; }
303 FuncTy *func()
const {
return this->first; }
304 unsigned cloneNo()
const {
return this->second; }
308 struct CallInfo final :
public std::pair<CallTy, unsigned > {
309 using Base = std::pair<CallTy, unsigned>;
311 CallInfo(CallTy
Call =
nullptr,
unsigned CloneNo = 0)
313 explicit operator bool()
const {
return (
bool)this->first; }
314 CallTy call()
const {
return this->first; }
315 unsigned cloneNo()
const {
return this->second; }
316 void setCloneNo(
unsigned N) { this->second =
N; }
318 if (!
operator bool()) {
324 OS <<
"\t(clone " << cloneNo() <<
")";
350 bool Recursive =
false;
377 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
381 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
385 bool useCallerEdgesForContextInfo()
const {
390 assert(!CalleeEdges.empty() || CallerEdges.empty() || IsAllocation ||
408 for (
auto &Edge : CalleeEdges.empty() ? CallerEdges : CalleeEdges)
409 Count += Edge->getContextIds().size();
413 CalleeEdges, useCallerEdgesForContextInfo()
415 : std::vector<std::shared_ptr<ContextEdge>>());
416 for (
const auto &Edge : Edges)
423 uint8_t computeAllocType()
const {
428 CalleeEdges, useCallerEdgesForContextInfo()
430 : std::vector<std::shared_ptr<ContextEdge>>());
431 for (
const auto &Edge : Edges) {
442 bool emptyContextIds()
const {
444 CalleeEdges, useCallerEdgesForContextInfo()
446 : std::vector<std::shared_ptr<ContextEdge>>());
447 for (
const auto &Edge : Edges) {
448 if (!Edge->getContextIds().empty())
455 std::vector<ContextNode *> Clones;
458 ContextNode *CloneOf =
nullptr;
460 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
462 ContextNode(
bool IsAllocation, CallInfo
C)
463 : IsAllocation(IsAllocation),
Call(
C) {}
465 void addClone(ContextNode *Clone) {
467 CloneOf->Clones.push_back(Clone);
468 Clone->CloneOf = CloneOf;
470 Clones.push_back(Clone);
472 Clone->CloneOf =
this;
476 ContextNode *getOrigNode() {
483 unsigned int ContextId);
485 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
486 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
487 void eraseCalleeEdge(
const ContextEdge *Edge);
488 void eraseCallerEdge(
const ContextEdge *Edge);
490 void setCall(CallInfo
C) {
Call =
C; }
492 bool hasCall()
const {
return (
bool)
Call.call(); }
498 bool isRemoved()
const {
534 bool IsBackedge =
false;
541 : Callee(Callee), Caller(Caller), AllocTypes(
AllocType),
542 ContextIds(std::move(ContextIds)) {}
548 inline void clear() {
558 inline bool isRemoved()
const {
559 if (Callee || Caller)
580 void removeNoneTypeCalleeEdges(ContextNode *
Node);
581 void removeNoneTypeCallerEdges(ContextNode *
Node);
583 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
589 template <
class NodeT,
class IteratorT>
590 std::vector<uint64_t>
595 ContextNode *addAllocNode(CallInfo
Call,
const FuncTy *
F);
598 template <
class NodeT,
class IteratorT>
599 void addStackNodesForMIB(
603 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold);
608 void updateStackNodes();
617 void fixupImportantContexts();
621 void handleCallsitesWithMultipleTargets();
624 void markBackedges();
634 bool partitionCallsByCallee(
636 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
643 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
650 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>
::iterator;
655 struct CallContextInfo {
659 std::vector<uint64_t> StackIds;
673 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
674 bool CalleeIter =
true);
682 void assignStackNodesPostOrder(
696 void propagateDuplicateContextIds(
702 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
710 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
720 calleesMatch(CallTy
Call, EdgeIter &EI,
725 const FuncTy *getCalleeFunc(CallTy
Call) {
726 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(
Call);
732 bool calleeMatchesFunc(
733 CallTy
Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
734 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
735 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
736 Call, Func, CallerFunc, FoundCalleeChain);
740 bool sameCallee(CallTy Call1, CallTy Call2) {
741 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
746 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy
Call) {
747 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
753 return static_cast<DerivedCCG *
>(
this)->getLastStackId(
Call);
759 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(
Call,
AllocType);
764 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(
Call);
769 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
770 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
776 FuncInfo cloneFunctionForCallsite(
778 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
779 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
780 Func,
Call, CallMap, CallsWithMetadataInFunc, CloneNo);
785 std::string getLabel(
const FuncTy *Func,
const CallTy
Call,
786 unsigned CloneNo)
const {
787 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func,
Call, CloneNo);
791 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
792 CallInfo
C = CallInfo()) {
793 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
794 auto *NewNode = NodeOwner.back().get();
796 NodeToCallingFunc[NewNode] =
F;
797 NewNode->NodeId = NodeOwner.size();
802 ContextNode *getNodeForInst(
const CallInfo &
C);
803 ContextNode *getNodeForAlloc(
const CallInfo &
C);
804 ContextNode *getNodeForStackId(
uint64_t StackId);
826 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
833 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
834 ContextNode *NewCallee,
835 bool NewClone =
false,
843 void moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &Edge,
844 ContextNode *NewCaller);
855 void mergeNodeCalleeClones(
860 void findOtherCallersToShareMerge(
861 ContextNode *
Node, std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
889 struct ImportantContextInfo {
891 std::vector<uint64_t> StackIds;
894 unsigned MaxLength = 0;
898 std::map<std::vector<uint64_t>, ContextNode *> StackIdsToNode;
907 void recordStackNode(std::vector<uint64_t> &StackIds, ContextNode *
Node,
921 auto Size = StackIds.size();
922 for (
auto Id : Ids) {
923 auto &Entry = ImportantContextIdInfo[Id];
924 Entry.StackIdsToNode[StackIds] =
Node;
926 if (
Size > Entry.MaxLength)
927 Entry.MaxLength =
Size;
936 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
942 unsigned int LastContextId = 0;
945template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
947 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
948template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
950 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
954template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
956 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
959class ModuleCallsiteContextGraph
960 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
963 ModuleCallsiteContextGraph(
965 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
968 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
971 uint64_t getStackId(uint64_t IdOrIndex)
const;
973 bool calleeMatchesFunc(
974 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
975 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
976 bool sameCallee(Instruction *Call1, Instruction *Call2);
977 bool findProfiledCalleeThroughTailCalls(
978 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
979 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
980 bool &FoundMultipleCalleeChains);
981 uint64_t getLastStackId(Instruction *
Call);
982 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *
Call);
985 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
986 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
988 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
989 DenseMap<CallInfo, CallInfo> &CallMap,
990 std::vector<CallInfo> &CallsWithMetadataInFunc,
992 std::string getLabel(
const Function *Func,
const Instruction *
Call,
993 unsigned CloneNo)
const;
996 llvm::function_ref<OptimizationRemarkEmitter &(
Function *)> OREGetter;
1002struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
1003 IndexCall() : PointerUnion() {}
1004 IndexCall(std::nullptr_t) : IndexCall() {}
1005 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
1006 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
1007 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
1009 IndexCall *operator->() {
return this; }
1011 void print(raw_ostream &OS)
const {
1012 PointerUnion<CallsiteInfo *, AllocInfo *>
Base = *
this;
1037class IndexCallsiteContextGraph
1038 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1041 IndexCallsiteContextGraph(
1042 ModuleSummaryIndex &Index,
1046 ~IndexCallsiteContextGraph() {
1051 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
1053 for (
auto &Callsite :
I.second)
1054 FS->addCallsite(*Callsite.second);
1059 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1062 uint64_t getStackId(uint64_t IdOrIndex)
const;
1063 const FunctionSummary *getCalleeFunc(IndexCall &
Call);
1064 bool calleeMatchesFunc(
1065 IndexCall &
Call,
const FunctionSummary *Func,
1066 const FunctionSummary *CallerFunc,
1067 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
1068 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
1069 bool findProfiledCalleeThroughTailCalls(
1070 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
1071 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
1072 bool &FoundMultipleCalleeChains);
1073 uint64_t getLastStackId(IndexCall &
Call);
1074 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &
Call);
1077 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
1078 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
1079 IndexCall>::FuncInfo
1080 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &
Call,
1081 DenseMap<CallInfo, CallInfo> &CallMap,
1082 std::vector<CallInfo> &CallsWithMetadataInFunc,
1084 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &
Call,
1085 unsigned CloneNo)
const;
1089 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
1091 const ModuleSummaryIndex &Index;
1099 std::unordered_map<FunctionSummary *,
1100 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
1101 FunctionCalleesToSynthesizedCallsiteInfos;
1112 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
1115 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
1136template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1137bool allocTypesMatch(
1138 const std::vector<uint8_t> &InAllocTypes,
1139 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
1143 assert(InAllocTypes.size() == Edges.size());
1145 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
1147 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
1151 if (l == (uint8_t)AllocationType::None ||
1152 r->AllocTypes == (uint8_t)AllocationType::None)
1154 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
1163template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1164bool allocTypesMatchClone(
1165 const std::vector<uint8_t> &InAllocTypes,
1166 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
1167 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
1171 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
1175 for (
const auto &
E : Clone->CalleeEdges) {
1177 EdgeCalleeMap[
E->Callee] =
E->AllocTypes;
1181 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
1182 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
1184 if (Iter == EdgeCalleeMap.
end())
1192 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1200template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1201typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1202CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1203 const CallInfo &
C) {
1204 ContextNode *
Node = getNodeForAlloc(
C);
1208 return NonAllocationCallToContextNodeMap.lookup(
C);
1211template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1212typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1213CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1214 const CallInfo &
C) {
1215 return AllocationCallToContextNodeMap.lookup(
C);
1218template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1219typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1220CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1222 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1223 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1224 return StackEntryNode->second;
1228template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1229void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1231 unsigned int ContextId) {
1232 for (
auto &
Edge : CallerEdges) {
1233 if (
Edge->Caller == Caller) {
1235 Edge->getContextIds().insert(ContextId);
1239 std::shared_ptr<ContextEdge>
Edge = std::make_shared<ContextEdge>(
1240 this, Caller, (uint8_t)
AllocType, DenseSet<uint32_t>({ContextId}));
1241 CallerEdges.push_back(
Edge);
1245template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1246void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1247 ContextEdge *
Edge, EdgeIter *EI,
bool CalleeIter) {
1263 auto CalleeCallerCount =
Callee->CallerEdges.size();
1264 auto CallerCalleeCount =
Caller->CalleeEdges.size();
1269 }
else if (CalleeIter) {
1271 *EI =
Caller->CalleeEdges.erase(*EI);
1274 *EI =
Callee->CallerEdges.erase(*EI);
1276 assert(
Callee->CallerEdges.size() < CalleeCallerCount);
1277 assert(
Caller->CalleeEdges.size() < CallerCalleeCount);
1280template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1281void CallsiteContextGraph<
1282 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1283 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1285 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1287 removeEdgeFromGraph(
Edge.get(), &EI,
true);
1293template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1294void CallsiteContextGraph<
1295 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1296 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1298 if (
Edge->AllocTypes == (uint8_t)AllocationType::None) {
1300 Edge->Caller->eraseCalleeEdge(
Edge.get());
1301 EI =
Node->CallerEdges.erase(EI);
1307template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1308typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1309CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1310 findEdgeFromCallee(
const ContextNode *Callee) {
1311 for (
const auto &
Edge : CalleeEdges)
1312 if (
Edge->Callee == Callee)
1317template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1318typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1319CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1320 findEdgeFromCaller(
const ContextNode *Caller) {
1321 for (
const auto &
Edge : CallerEdges)
1322 if (
Edge->Caller == Caller)
1327template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1328void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1329 eraseCalleeEdge(
const ContextEdge *
Edge) {
1331 CalleeEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1332 return CalleeEdge.get() ==
Edge;
1334 assert(EI != CalleeEdges.end());
1335 CalleeEdges.erase(EI);
1338template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1339void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1340 eraseCallerEdge(
const ContextEdge *
Edge) {
1342 CallerEdges, [
Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1343 return CallerEdge.get() ==
Edge;
1345 assert(EI != CallerEdges.end());
1346 CallerEdges.erase(EI);
1349template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1350uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1351 DenseSet<uint32_t> &ContextIds)
const {
1353 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1354 uint8_t
AllocType = (uint8_t)AllocationType::None;
1355 for (
auto Id : ContextIds) {
1356 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1366CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1367 const DenseSet<uint32_t> &Node1Ids,
1368 const DenseSet<uint32_t> &Node2Ids)
const {
1370 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1371 uint8_t
AllocType = (uint8_t)AllocationType::None;
1372 for (
auto Id : Node1Ids) {
1373 if (!Node2Ids.
count(Id))
1375 AllocType |= (uint8_t)ContextIdToAllocationType.at(Id);
1383template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1384uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1385 const DenseSet<uint32_t> &Node1Ids,
1386 const DenseSet<uint32_t> &Node2Ids)
const {
1387 if (Node1Ids.
size() < Node2Ids.
size())
1388 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1390 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1393template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1394typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1395CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1396 CallInfo
Call,
const FuncTy *
F) {
1398 ContextNode *AllocNode = createNewNode(
true,
F,
Call);
1399 AllocationCallToContextNodeMap[
Call] = AllocNode;
1401 AllocNode->OrigStackOrAllocId = LastContextId;
1404 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1420template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1421template <
class NodeT,
class IteratorT>
1422void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1423 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1426 std::map<uint64_t, uint32_t> &TotalSizeToContextIdTopNCold) {
1432 ContextIdToAllocationType[++LastContextId] =
AllocType;
1434 bool IsImportant =
false;
1435 if (!ContextSizeInfo.
empty()) {
1436 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1440 uint64_t TotalCold = 0;
1441 for (
auto &CSI : ContextSizeInfo)
1442 TotalCold += CSI.TotalSize;
1448 TotalCold > TotalSizeToContextIdTopNCold.begin()->first) {
1451 auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second;
1452 TotalSizeToContextIdTopNCold.erase(
1453 TotalSizeToContextIdTopNCold.begin());
1454 assert(ImportantContextIdInfo.count(IdToRemove));
1455 ImportantContextIdInfo.erase(IdToRemove);
1457 TotalSizeToContextIdTopNCold[TotalCold] = LastContextId;
1461 Entry.insert(
Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1465 AllocNode->AllocTypes |= (uint8_t)
AllocType;
1470 ContextNode *PrevNode = AllocNode;
1474 SmallSet<uint64_t, 8> StackIdSet;
1477 ContextIter != StackContext.
end(); ++ContextIter) {
1478 auto StackId = getStackId(*ContextIter);
1480 ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId);
1481 ContextNode *StackNode = getNodeForStackId(StackId);
1483 StackNode = createNewNode(
false);
1484 StackEntryIdToContextNodeMap[StackId] = StackNode;
1485 StackNode->OrigStackOrAllocId = StackId;
1490 auto Ins = StackIdSet.
insert(StackId);
1492 StackNode->Recursive =
true;
1494 StackNode->AllocTypes |= (uint8_t)
AllocType;
1495 PrevNode->addOrUpdateCallerEdge(StackNode,
AllocType, LastContextId);
1496 PrevNode = StackNode;
1500template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1502CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1503 const DenseSet<uint32_t> &StackSequenceContextIds,
1504 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1505 DenseSet<uint32_t> NewContextIds;
1506 for (
auto OldId : StackSequenceContextIds) {
1507 NewContextIds.
insert(++LastContextId);
1508 OldToNewContextIds[OldId].insert(LastContextId);
1509 assert(ContextIdToAllocationType.count(OldId));
1511 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1512 auto CSI = ContextIdToContextSizeInfos.find(OldId);
1513 if (CSI != ContextIdToContextSizeInfos.end())
1514 ContextIdToContextSizeInfos[LastContextId] = CSI->second;
1515 if (DotAllocContextIds.
contains(OldId))
1516 DotAllocContextIds.
insert(LastContextId);
1518 return NewContextIds;
1521template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1522void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1523 propagateDuplicateContextIds(
1524 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1526 auto GetNewIds = [&OldToNewContextIds](
const DenseSet<uint32_t> &ContextIds) {
1527 DenseSet<uint32_t> NewIds;
1528 for (
auto Id : ContextIds)
1529 if (
auto NewId = OldToNewContextIds.find(Id);
1530 NewId != OldToNewContextIds.end())
1536 auto UpdateCallers = [&](ContextNode *
Node,
1537 DenseSet<const ContextEdge *> &Visited,
1538 auto &&UpdateCallers) ->
void {
1539 for (
const auto &
Edge :
Node->CallerEdges) {
1543 ContextNode *NextNode =
Edge->Caller;
1544 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(
Edge->getContextIds());
1547 if (!NewIdsToAdd.
empty()) {
1548 Edge->getContextIds().insert_range(NewIdsToAdd);
1549 UpdateCallers(NextNode, Visited, UpdateCallers);
1554 DenseSet<const ContextEdge *> Visited;
1555 for (
auto &Entry : AllocationCallToContextNodeMap) {
1557 UpdateCallers(Node, Visited, UpdateCallers);
1561template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1562void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1563 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1566 DenseSet<uint32_t> RemainingContextIds) {
1568 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1569 DenseSet<uint32_t> RecursiveContextIds;
1570 DenseSet<uint32_t> AllCallerContextIds;
1575 for (
auto &CE : OrigEdges) {
1576 AllCallerContextIds.
reserve(
CE->getContextIds().size());
1577 for (
auto Id :
CE->getContextIds())
1578 if (!AllCallerContextIds.
insert(Id).second)
1579 RecursiveContextIds.
insert(Id);
1583 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1585 DenseSet<uint32_t> NewEdgeContextIds;
1586 DenseSet<uint32_t> NotFoundContextIds;
1590 set_subtract(
Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1591 NotFoundContextIds);
1594 if (RecursiveContextIds.
empty()) {
1597 RemainingContextIds.
swap(NotFoundContextIds);
1607 DenseSet<uint32_t> NonRecursiveRemainingCurEdgeIds =
1609 set_subtract(RemainingContextIds, NonRecursiveRemainingCurEdgeIds);
1612 if (NewEdgeContextIds.
empty()) {
1616 if (TowardsCallee) {
1617 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1618 auto NewEdge = std::make_shared<ContextEdge>(
1619 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1620 NewNode->CalleeEdges.push_back(NewEdge);
1621 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1623 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1624 auto NewEdge = std::make_shared<ContextEdge>(
1625 NewNode,
Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1626 NewNode->CallerEdges.push_back(NewEdge);
1627 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1630 if (
Edge->getContextIds().empty()) {
1631 removeEdgeFromGraph(
Edge.get(), &EI, TowardsCallee);
1638template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1640 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1644 assert(!Edge->ContextIds.empty());
1647template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1649 bool CheckEdges =
true) {
1650 if (
Node->isRemoved())
1654 auto NodeContextIds =
Node->getContextIds();
1658 if (
Node->CallerEdges.size()) {
1660 Node->CallerEdges.front()->ContextIds);
1664 set_union(CallerEdgeContextIds, Edge->ContextIds);
1671 NodeContextIds == CallerEdgeContextIds ||
1674 if (
Node->CalleeEdges.size()) {
1676 Node->CalleeEdges.front()->ContextIds);
1680 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1686 NodeContextIds == CalleeEdgeContextIds);
1695 for (
const auto &
E :
Node->CalleeEdges)
1701template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1702void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1703 assignStackNodesPostOrder(ContextNode *Node,
1704 DenseSet<const ContextNode *> &Visited,
1705 DenseMap<uint64_t, std::vector<CallContextInfo>>
1706 &StackIdToMatchingCalls,
1707 DenseMap<CallInfo, CallInfo> &CallToMatchingCall,
1708 const DenseSet<uint32_t> &ImportantContextIds) {
1716 auto CallerEdges =
Node->CallerEdges;
1717 for (
auto &
Edge : CallerEdges) {
1719 if (
Edge->isRemoved()) {
1723 assignStackNodesPostOrder(
Edge->Caller, Visited, StackIdToMatchingCalls,
1724 CallToMatchingCall, ImportantContextIds);
1733 if (
Node->IsAllocation ||
1734 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1737 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1741 if (Calls.size() == 1) {
1742 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1743 if (Ids.size() == 1) {
1744 assert(SavedContextIds.empty());
1746 assert(Node == getNodeForStackId(Ids[0]));
1747 if (
Node->Recursive)
1750 NonAllocationCallToContextNodeMap[
Call] =
Node;
1752 recordStackNode(Ids, Node,
Node->getContextIds(), ImportantContextIds);
1760 uint64_t LastId =
Node->OrigStackOrAllocId;
1761 ContextNode *LastNode = getNodeForStackId(LastId);
1764 assert(LastNode == Node);
1766 ContextNode *LastNode =
Node;
1771 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1773 [[maybe_unused]]
bool PrevIterCreatedNode =
false;
1774 bool CreatedNode =
false;
1775 for (
unsigned I = 0;
I < Calls.size();
1776 I++, PrevIterCreatedNode = CreatedNode) {
1777 CreatedNode =
false;
1778 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1781 if (SavedContextIds.empty()) {
1788 auto MatchingCall = CallToMatchingCall[
Call];
1789 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1793 assert(
I > 0 && !PrevIterCreatedNode);
1796 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1801 assert(LastId == Ids.back());
1810 ContextNode *PrevNode = LastNode;
1814 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1816 ContextNode *CurNode = getNodeForStackId(Id);
1820 assert(!CurNode->Recursive);
1822 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
1834 if (SavedContextIds.empty()) {
1843 ContextNode *NewNode = createNewNode(
false, Func,
Call);
1844 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1846 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1848 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1854 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1859 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1864 for (
auto Id : Ids) {
1865 ContextNode *CurNode = getNodeForStackId(Id);
1872 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1879 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1880 if (PrevEdge->getContextIds().empty())
1881 removeEdgeFromGraph(PrevEdge);
1886 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1887 ? (uint8_t)AllocationType::None
1888 : CurNode->computeAllocType();
1892 recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds);
1896 for (
auto Id : Ids) {
1897 ContextNode *CurNode = getNodeForStackId(Id);
1906template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1907void CallsiteContextGraph<DerivedCCG, FuncTy,
1908 CallTy>::fixupImportantContexts() {
1909 if (ImportantContextIdInfo.empty())
1913 NumImportantContextIds = ImportantContextIdInfo.size();
1919 exportToDot(
"beforestackfixup");
1944 for (
auto &[CurContextId,
Info] : ImportantContextIdInfo) {
1945 if (
Info.StackIdsToNode.empty())
1948 ContextNode *PrevNode =
nullptr;
1949 ContextNode *CurNode =
nullptr;
1950 DenseSet<const ContextEdge *> VisitedEdges;
1951 ArrayRef<uint64_t> AllStackIds(
Info.StackIds);
1954 for (
unsigned I = 0;
I < AllStackIds.size();
I++, PrevNode = CurNode) {
1958 auto LenToEnd = AllStackIds.size() -
I;
1966 auto CheckStackIds = AllStackIds.slice(
I, Len);
1967 auto EntryIt =
Info.StackIdsToNode.find(CheckStackIds);
1968 if (EntryIt ==
Info.StackIdsToNode.end())
1970 CurNode = EntryIt->second;
1987 auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode);
1990 if (CurEdge->getContextIds().insert(CurContextId).second) {
1991 NumFixupEdgeIdsInserted++;
1996 NumFixupEdgesAdded++;
1997 DenseSet<uint32_t> ContextIds({CurContextId});
1998 auto AllocType = computeAllocType(ContextIds);
1999 auto NewEdge = std::make_shared<ContextEdge>(
2000 PrevNode, CurNode,
AllocType, std::move(ContextIds));
2001 PrevNode->CallerEdges.push_back(NewEdge);
2002 CurNode->CalleeEdges.push_back(NewEdge);
2004 CurEdge = NewEdge.get();
2007 VisitedEdges.
insert(CurEdge);
2010 for (
auto &
Edge : PrevNode->CallerEdges) {
2014 Edge->getContextIds().erase(CurContextId);
2022template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2023void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
2031 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
2032 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2033 for (
auto &
Call : CallsWithMetadata) {
2035 if (AllocationCallToContextNodeMap.count(
Call))
2037 auto StackIdsWithContextNodes =
2038 getStackIdsWithContextNodesForCall(
Call.call());
2041 if (StackIdsWithContextNodes.empty())
2045 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
2046 {
Call.call(), StackIdsWithContextNodes,
Func, {}});
2056 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
2060 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
2061 for (
auto &It : StackIdToMatchingCalls) {
2062 auto &Calls = It.getSecond();
2064 if (Calls.size() == 1) {
2065 auto &Ids = Calls[0].StackIds;
2066 if (Ids.size() == 1)
2079 DenseMap<const FuncTy *, unsigned> FuncToIndex;
2080 for (
const auto &[Idx, CallCtxInfo] :
enumerate(Calls))
2081 FuncToIndex.
insert({CallCtxInfo.Func, Idx});
2084 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
2085 return A.StackIds.size() >
B.StackIds.size() ||
2086 (
A.StackIds.size() ==
B.StackIds.size() &&
2087 (
A.StackIds <
B.StackIds ||
2088 (
A.StackIds ==
B.StackIds &&
2089 FuncToIndex[
A.Func] < FuncToIndex[
B.Func])));
2095 uint64_t LastId = It.getFirst();
2096 ContextNode *LastNode = getNodeForStackId(LastId);
2100 if (LastNode->Recursive)
2105 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
2113 DenseSet<const FuncTy *> MatchingIdsFuncSet;
2116 for (
unsigned I = 0;
I < Calls.size();
I++) {
2117 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
2118 assert(SavedContextIds.empty());
2119 assert(LastId == Ids.back());
2124 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
2125 MatchingIdsFuncSet.
clear();
2132 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
2134 ContextNode *PrevNode = LastNode;
2135 ContextNode *CurNode = LastNode;
2140 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
2142 CurNode = getNodeForStackId(Id);
2146 if (CurNode->Recursive) {
2151 auto *
Edge = CurNode->findEdgeFromCaller(PrevNode);
2172 if (StackSequenceContextIds.
empty()) {
2185 if (Ids.back() != getLastStackId(
Call)) {
2186 for (
const auto &PE : LastNode->CallerEdges) {
2187 set_subtract(StackSequenceContextIds, PE->getContextIds());
2188 if (StackSequenceContextIds.
empty())
2192 if (StackSequenceContextIds.
empty())
2204 MatchingIdsFuncSet.
insert(Func);
2211 bool DuplicateContextIds =
false;
2212 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
2213 auto &CallCtxInfo = Calls[J];
2214 auto &NextIds = CallCtxInfo.StackIds;
2217 auto *NextFunc = CallCtxInfo.Func;
2218 if (NextFunc != Func) {
2221 DuplicateContextIds =
true;
2224 auto &NextCall = CallCtxInfo.Call;
2225 CallToMatchingCall[NextCall] =
Call;
2236 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
2237 StackSequenceContextIds.
size());
2240 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
2241 : StackSequenceContextIds;
2242 assert(!SavedContextIds.empty());
2244 if (!DuplicateContextIds) {
2248 set_subtract(LastNodeContextIds, StackSequenceContextIds);
2249 if (LastNodeContextIds.
empty())
2256 propagateDuplicateContextIds(OldToNewContextIds);
2266 DenseSet<const ContextNode *> Visited;
2268 ImportantContextIdInfo.keys());
2269 for (
auto &Entry : AllocationCallToContextNodeMap)
2270 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
2271 CallToMatchingCall, ImportantContextIds);
2273 fixupImportantContexts();
2279uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *
Call) {
2280 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2282 return CallsiteContext.
back();
2285uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &
Call) {
2287 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2290 return Index.getStackIdAtIndex(CallsiteContext.
back());
2312 auto Pos =
F.getName().find_last_of(
'.');
2315 bool Err =
F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
2321std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
2322 const Instruction *
Call,
2323 unsigned CloneNo)
const {
2329std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
2330 const IndexCall &
Call,
2331 unsigned CloneNo)
const {
2332 auto VI = FSToVIMap.find(Func);
2333 assert(VI != FSToVIMap.end());
2336 return CallerName +
" -> alloc";
2339 return CallerName +
" -> " +
2341 Callsite->Clones[CloneNo]);
2345std::vector<uint64_t>
2346ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
2347 Instruction *
Call) {
2348 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2350 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
2354std::vector<uint64_t>
2355IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &
Call) {
2357 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
2359 return getStackIdsWithContextNodes<CallsiteInfo,
2360 SmallVector<unsigned>::const_iterator>(
2364template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2365template <
class NodeT,
class IteratorT>
2366std::vector<uint64_t>
2367CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
2368 CallStack<NodeT, IteratorT> &CallsiteContext) {
2369 std::vector<uint64_t> StackIds;
2370 for (
auto IdOrIndex : CallsiteContext) {
2371 auto StackId = getStackId(IdOrIndex);
2372 ContextNode *
Node = getNodeForStackId(StackId);
2375 StackIds.push_back(StackId);
2380ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
2382 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
2383 :
Mod(
M), OREGetter(OREGetter) {
2387 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2389 std::vector<CallInfo> CallsWithMetadata;
2390 for (
auto &BB :
F) {
2391 for (
auto &
I : BB) {
2394 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
2395 CallsWithMetadata.push_back(&
I);
2396 auto *AllocNode = addAllocNode(&
I, &
F);
2397 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
2401 for (
auto &MDOp : MemProfMD->operands()) {
2403 std::vector<ContextTotalSize> ContextSizeInfo;
2405 if (MIBMD->getNumOperands() > 2) {
2406 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
2407 MDNode *ContextSizePair =
2416 ContextSizeInfo.push_back({FullStackId, TotalSize});
2422 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2423 AllocNode, StackContext, CallsiteContext,
2425 TotalSizeToContextIdTopNCold);
2430 DotAllocContextIds = AllocNode->getContextIds();
2434 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2435 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2438 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2439 CallsWithMetadata.push_back(&
I);
2443 if (!CallsWithMetadata.empty())
2444 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2448 dbgs() <<
"CCG before updating call stack chains:\n";
2453 exportToDot(
"prestackupdate");
2458 exportToDot(
"poststackupdate");
2460 handleCallsitesWithMultipleTargets();
2465 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2466 for (
auto &
Call : FuncEntry.second)
2467 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2470IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2474 : Index(Index), isPrevailing(isPrevailing) {
2478 std::map<uint64_t, uint32_t> TotalSizeToContextIdTopNCold;
2479 for (
auto &
I : Index) {
2480 auto VI = Index.getValueInfo(
I);
2481 for (
auto &S : VI.getSummaryList()) {
2490 !isPrevailing(VI.getGUID(), S.get()))
2495 std::vector<CallInfo> CallsWithMetadata;
2496 if (!
FS->allocs().empty()) {
2497 for (
auto &AN :
FS->mutableAllocs()) {
2502 if (AN.MIBs.empty())
2504 IndexCall AllocCall(&AN);
2505 CallsWithMetadata.push_back(AllocCall);
2506 auto *AllocNode = addAllocNode(AllocCall, FS);
2514 AN.ContextSizeInfos.size() == AN.MIBs.size());
2516 for (
auto &MIB : AN.MIBs) {
2519 std::vector<ContextTotalSize> ContextSizeInfo;
2520 if (!AN.ContextSizeInfos.empty()) {
2521 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2522 ContextSizeInfo.push_back({FullStackId, TotalSize});
2524 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2525 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2526 ContextSizeInfo, TotalSizeToContextIdTopNCold);
2532 DotAllocContextIds = AllocNode->getContextIds();
2538 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2542 if (!
FS->callsites().empty())
2543 for (
auto &SN :
FS->mutableCallsites()) {
2544 IndexCall StackNodeCall(&SN);
2545 CallsWithMetadata.push_back(StackNodeCall);
2548 if (!CallsWithMetadata.empty())
2549 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2551 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2557 dbgs() <<
"CCG before updating call stack chains:\n";
2562 exportToDot(
"prestackupdate");
2567 exportToDot(
"poststackupdate");
2569 handleCallsitesWithMultipleTargets();
2574template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2575void CallsiteContextGraph<DerivedCCG, FuncTy,
2576 CallTy>::handleCallsitesWithMultipleTargets() {
2591 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2592 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2593 auto *
Node = Entry.second;
2602 std::vector<CallInfo> AllCalls;
2603 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2604 AllCalls.push_back(
Node->Call);
2618 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2621 auto It = AllCalls.begin();
2623 for (; It != AllCalls.end(); ++It) {
2626 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2629 if (!Edge->Callee->hasCall())
2631 assert(NodeToCallingFunc.count(Edge->Callee));
2633 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2642 if (
Node->Call != ThisCall) {
2643 Node->setCall(ThisCall);
2654 Node->MatchingCalls.clear();
2657 if (It == AllCalls.end()) {
2658 RemovedEdgesWithMismatchedCallees++;
2662 Node->setCall(CallInfo());
2667 for (++It; It != AllCalls.end(); ++It) {
2671 Node->MatchingCalls.push_back(ThisCall);
2680 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2681 return !it.second->hasCall() || it.second->Call != it.first;
2685 for (
auto &[
Call,
Node] : NewCallToNode)
2686 NonAllocationCallToContextNodeMap[
Call] =
Node;
2690 for (
auto &[
Call,
Node] : TailCallToContextNodeMap)
2691 NonAllocationCallToContextNodeMap[
Call] =
Node;
2694template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2695bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2697 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2701 struct CallsWithSameCallee {
2702 std::vector<CallInfo> Calls;
2703 ContextNode *
Node =
nullptr;
2709 for (
auto ThisCall : AllCalls) {
2710 auto *
F = getCalleeFunc(
ThisCall.call());
2712 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2721 for (
const auto &Edge :
Node->CalleeEdges) {
2722 if (!Edge->Callee->hasCall())
2724 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2725 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2726 CalleeNodeToCallInfo[Edge->Callee] =
2727 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2733 if (CalleeNodeToCallInfo.
empty())
2745 ContextNode *UnmatchedCalleesNode =
nullptr;
2747 bool UsedOrigNode =
false;
2752 auto CalleeEdges =
Node->CalleeEdges;
2753 for (
auto &Edge : CalleeEdges) {
2754 if (!Edge->Callee->hasCall())
2759 ContextNode *CallerNodeToUse =
nullptr;
2763 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2764 if (!UnmatchedCalleesNode)
2765 UnmatchedCalleesNode =
2766 createNewNode(
false, NodeToCallingFunc[
Node]);
2767 CallerNodeToUse = UnmatchedCalleesNode;
2771 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2774 if (!UsedOrigNode) {
2777 Node->MatchingCalls.clear();
2778 UsedOrigNode =
true;
2781 createNewNode(
false, NodeToCallingFunc[
Node]);
2785 Info->Node->setCall(
Info->Calls.front());
2791 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2793 CallerNodeToUse =
Info->Node;
2797 if (CallerNodeToUse ==
Node)
2800 moveCalleeEdgeToNewCaller(Edge, CallerNodeToUse);
2807 for (
auto &
I : CalleeNodeToCallInfo)
2808 removeNoneTypeCallerEdges(
I.second->Node);
2809 if (UnmatchedCalleesNode)
2810 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2811 removeNoneTypeCallerEdges(
Node);
2821uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex)
const {
2824 return Index.getStackIdAtIndex(IdOrIndex);
2827template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2828bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2829 CallTy
Call, EdgeIter &EI,
2830 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2832 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[
Edge->Callee];
2833 const FuncTy *CallerFunc = NodeToCallingFunc[
Edge->Caller];
2836 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2837 if (!calleeMatchesFunc(
Call, ProfiledCalleeFunc, CallerFunc,
2842 if (FoundCalleeChain.empty())
2846 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2850 CurEdge->ContextIds.insert_range(
Edge->ContextIds);
2851 CurEdge->AllocTypes |=
Edge->AllocTypes;
2856 auto NewEdge = std::make_shared<ContextEdge>(
2857 Callee, Caller,
Edge->AllocTypes,
Edge->ContextIds);
2858 Callee->CallerEdges.push_back(NewEdge);
2859 if (Caller ==
Edge->Caller) {
2863 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2866 "Iterator position not restored after insert and increment");
2868 Caller->CalleeEdges.push_back(NewEdge);
2873 auto *CurCalleeNode =
Edge->Callee;
2874 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2875 ContextNode *NewNode =
nullptr;
2877 if (TailCallToContextNodeMap.
count(NewCall)) {
2878 NewNode = TailCallToContextNodeMap[NewCall];
2879 NewNode->AllocTypes |=
Edge->AllocTypes;
2881 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2883 NewNode = createNewNode(
false, Func, NewCall);
2884 TailCallToContextNodeMap[NewCall] = NewNode;
2885 NewNode->AllocTypes =
Edge->AllocTypes;
2889 AddEdge(NewNode, CurCalleeNode);
2891 CurCalleeNode = NewNode;
2895 AddEdge(
Edge->Caller, CurCalleeNode);
2903 removeEdgeFromGraph(
Edge.get(), &EI,
true);
2915bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2916 const Function *ProfiledCallee,
Value *CurCallee,
unsigned Depth,
2917 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2918 bool &FoundMultipleCalleeChains) {
2925 FoundCalleeChain.push_back({Callsite,
F});
2940 bool FoundSingleCalleeChain =
false;
2941 for (
auto &BB : *CalleeFunc) {
2942 for (
auto &
I : BB) {
2944 if (!CB || !CB->isTailCall())
2946 auto *CalledValue = CB->getCalledOperand();
2947 auto *CalledFunction = CB->getCalledFunction();
2948 if (CalledValue && !CalledFunction) {
2949 CalledValue = CalledValue->stripPointerCasts();
2956 assert(!CalledFunction &&
2957 "Expected null called function in callsite for alias");
2960 if (!CalledFunction)
2962 if (CalledFunction == ProfiledCallee) {
2963 if (FoundSingleCalleeChain) {
2964 FoundMultipleCalleeChains =
true;
2967 FoundSingleCalleeChain =
true;
2968 FoundProfiledCalleeCount++;
2969 FoundProfiledCalleeDepth +=
Depth;
2970 if (
Depth > FoundProfiledCalleeMaxDepth)
2971 FoundProfiledCalleeMaxDepth =
Depth;
2972 SaveCallsiteInfo(&
I, CalleeFunc);
2973 }
else if (findProfiledCalleeThroughTailCalls(
2974 ProfiledCallee, CalledFunction,
Depth + 1,
2975 FoundCalleeChain, FoundMultipleCalleeChains)) {
2978 assert(!FoundMultipleCalleeChains);
2979 if (FoundSingleCalleeChain) {
2980 FoundMultipleCalleeChains =
true;
2983 FoundSingleCalleeChain =
true;
2984 SaveCallsiteInfo(&
I, CalleeFunc);
2985 }
else if (FoundMultipleCalleeChains)
2990 return FoundSingleCalleeChain;
2993const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *
Call) {
2995 if (!CB->getCalledOperand() || CB->isIndirectCall())
2997 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3004bool ModuleCallsiteContextGraph::calleeMatchesFunc(
3005 Instruction *
Call,
const Function *Func,
const Function *CallerFunc,
3006 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
3008 if (!CB->getCalledOperand() || CB->isIndirectCall())
3010 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
3012 if (CalleeFunc == Func)
3015 if (Alias && Alias->getAliasee() == Func)
3026 bool FoundMultipleCalleeChains =
false;
3027 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
3029 FoundMultipleCalleeChains)) {
3030 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
3031 <<
Func->getName() <<
" from " << CallerFunc->
getName()
3032 <<
" that actually called " << CalleeVal->getName()
3033 << (FoundMultipleCalleeChains
3034 ?
" (found multiple possible chains)"
3037 if (FoundMultipleCalleeChains)
3038 FoundProfiledCalleeNonUniquelyCount++;
3045bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
3046 Instruction *Call2) {
3048 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
3050 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
3053 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
3055 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
3057 return CalleeFunc1 == CalleeFunc2;
3060bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
3061 ValueInfo ProfiledCallee, ValueInfo CurCallee,
unsigned Depth,
3062 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
3063 bool &FoundMultipleCalleeChains) {
3069 auto CreateAndSaveCallsiteInfo = [&](ValueInfo
Callee, FunctionSummary *
FS) {
3072 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
3073 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
3076 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee] =
3077 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
3078 CallsiteInfo *NewCallsiteInfo =
3079 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
3080 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
3087 bool FoundSingleCalleeChain =
false;
3090 !isPrevailing(CurCallee.
getGUID(), S.get()))
3095 auto FSVI = CurCallee;
3098 FSVI = AS->getAliaseeVI();
3099 for (
auto &CallEdge :
FS->calls()) {
3100 if (!CallEdge.second.hasTailCall())
3102 if (CallEdge.first == ProfiledCallee) {
3103 if (FoundSingleCalleeChain) {
3104 FoundMultipleCalleeChains =
true;
3107 FoundSingleCalleeChain =
true;
3108 FoundProfiledCalleeCount++;
3109 FoundProfiledCalleeDepth +=
Depth;
3110 if (
Depth > FoundProfiledCalleeMaxDepth)
3111 FoundProfiledCalleeMaxDepth =
Depth;
3112 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3114 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3115 FSToVIMap[
FS] = FSVI;
3116 }
else if (findProfiledCalleeThroughTailCalls(
3117 ProfiledCallee, CallEdge.first,
Depth + 1,
3118 FoundCalleeChain, FoundMultipleCalleeChains)) {
3121 assert(!FoundMultipleCalleeChains);
3122 if (FoundSingleCalleeChain) {
3123 FoundMultipleCalleeChains =
true;
3126 FoundSingleCalleeChain =
true;
3127 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
3129 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
3130 FSToVIMap[
FS] = FSVI;
3131 }
else if (FoundMultipleCalleeChains)
3136 return FoundSingleCalleeChain;
3139const FunctionSummary *
3140IndexCallsiteContextGraph::getCalleeFunc(IndexCall &
Call) {
3142 if (
Callee.getSummaryList().empty())
3147bool IndexCallsiteContextGraph::calleeMatchesFunc(
3148 IndexCall &
Call,
const FunctionSummary *Func,
3149 const FunctionSummary *CallerFunc,
3150 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
3154 AliasSummary *Alias =
3155 Callee.getSummaryList().empty()
3158 assert(FSToVIMap.count(Func));
3159 auto FuncVI = FSToVIMap[
Func];
3160 if (Callee == FuncVI ||
3175 bool FoundMultipleCalleeChains =
false;
3176 if (!findProfiledCalleeThroughTailCalls(
3177 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
3178 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
3179 <<
" from " << FSToVIMap[CallerFunc]
3180 <<
" that actually called " << Callee
3181 << (FoundMultipleCalleeChains
3182 ?
" (found multiple possible chains)"
3185 if (FoundMultipleCalleeChains)
3186 FoundProfiledCalleeNonUniquelyCount++;
3193bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
3196 return Callee1 == Callee2;
3199template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3200void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
3206template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3207void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
3208 raw_ostream &OS)
const {
3209 OS <<
"Node " <<
this <<
"\n";
3213 OS <<
" (recursive)";
3215 if (!MatchingCalls.empty()) {
3216 OS <<
"\tMatchingCalls:\n";
3217 for (
auto &MatchingCall : MatchingCalls) {
3219 MatchingCall.print(OS);
3223 OS <<
"\tNodeId: " <<
NodeId <<
"\n";
3225 OS <<
"\tContextIds:";
3227 auto ContextIds = getContextIds();
3228 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3229 std::sort(SortedIds.begin(), SortedIds.end());
3230 for (
auto Id : SortedIds)
3233 OS <<
"\tCalleeEdges:\n";
3234 for (
auto &
Edge : CalleeEdges)
3235 OS <<
"\t\t" << *
Edge <<
" (Callee NodeId: " <<
Edge->Callee->NodeId
3237 OS <<
"\tCallerEdges:\n";
3238 for (
auto &
Edge : CallerEdges)
3239 OS <<
"\t\t" << *
Edge <<
" (Caller NodeId: " <<
Edge->Caller->NodeId
3241 if (!Clones.empty()) {
3244 for (
auto *
C : Clones) {
3248 OS <<
C <<
" NodeId: " <<
C->NodeId;
3251 }
else if (CloneOf) {
3252 OS <<
"\tClone of " << CloneOf <<
" NodeId: " << CloneOf->NodeId <<
"\n";
3256template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3257void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
3263template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3264void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
3265 raw_ostream &OS)
const {
3266 OS <<
"Edge from Callee " <<
Callee <<
" to Caller: " <<
Caller
3267 << (IsBackedge ?
" (BE)" :
"")
3269 OS <<
" ContextIds:";
3270 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3271 std::sort(SortedIds.begin(), SortedIds.end());
3272 for (
auto Id : SortedIds)
3276template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3277void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
3281template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3282void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
3283 raw_ostream &OS)
const {
3284 OS <<
"Callsite Context Graph:\n";
3285 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3287 if (
Node->isRemoved())
3294template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
3296 raw_ostream &OS)
const {
3297 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3299 if (
Node->isRemoved())
3301 if (!
Node->IsAllocation)
3303 DenseSet<uint32_t> ContextIds =
Node->getContextIds();
3304 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
3305 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3306 std::sort(SortedIds.begin(), SortedIds.end());
3307 for (
auto Id : SortedIds) {
3308 auto TypeI = ContextIdToAllocationType.find(Id);
3309 assert(TypeI != ContextIdToAllocationType.end());
3310 auto CSI = ContextIdToContextSizeInfos.find(Id);
3311 if (CSI != ContextIdToContextSizeInfos.end()) {
3312 for (
auto &
Info : CSI->second) {
3313 OS <<
"MemProf hinting: "
3315 <<
" full allocation context " <<
Info.FullStackId
3316 <<
" with total size " <<
Info.TotalSize <<
" is "
3318 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
3320 <<
" due to cold byte percent";
3322 OS <<
" (context id " <<
Id <<
")";
3330template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3331void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
3332 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3335 for (
auto &
Edge :
Node->CallerEdges)
3340template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3342 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3343 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
3345 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
3361 return G->NodeOwner.begin()->get();
3364 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
3365 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
3384template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3398 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
3404 std::string LabelString =
3405 (
Twine(
"OrigId: ") + (
Node->IsAllocation ?
"Alloc" :
"") +
3408 LabelString +=
"\n";
3409 if (
Node->hasCall()) {
3410 auto Func =
G->NodeToCallingFunc.find(
Node);
3411 assert(Func !=
G->NodeToCallingFunc.end());
3413 G->getLabel(Func->second,
Node->Call.call(),
Node->Call.cloneNo());
3415 LabelString +=
"null call";
3416 if (
Node->Recursive)
3417 LabelString +=
" (recursive)";
3419 LabelString +=
" (external)";
3425 auto ContextIds =
Node->getContextIds();
3429 bool Highlight =
false;
3438 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(
Node) +
" " +
3439 getContextIds(ContextIds) +
"\"")
3443 AttributeString +=
",fontsize=\"30\"";
3445 (
Twine(
",fillcolor=\"") + getColor(
Node->AllocTypes, Highlight) +
"\"")
3447 if (
Node->CloneOf) {
3448 AttributeString +=
",color=\"blue\"";
3449 AttributeString +=
",style=\"filled,bold,dashed\"";
3451 AttributeString +=
",style=\"filled\"";
3452 return AttributeString;
3457 auto &Edge = *(ChildIter.getCurrent());
3462 bool Highlight =
false;
3471 auto Color = getColor(Edge->AllocTypes, Highlight);
3472 std::string AttributeString =
3473 (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
3475 Twine(
",fillcolor=\"") + Color +
"\"" +
Twine(
",color=\"") + Color +
3478 if (Edge->IsBackedge)
3479 AttributeString +=
",style=\"dotted\"";
3482 AttributeString +=
",penwidth=\"2.0\",weight=\"2\"";
3483 return AttributeString;
3489 if (
Node->isRemoved())
3502 std::string IdString =
"ContextIds:";
3503 if (ContextIds.
size() < 100) {
3504 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3505 std::sort(SortedIds.begin(), SortedIds.end());
3506 for (
auto Id : SortedIds)
3507 IdString += (
" " +
Twine(Id)).str();
3509 IdString += (
" (" + Twine(ContextIds.
size()) +
" ids)").str();
3514 static std::string getColor(uint8_t AllocTypes,
bool Highlight) {
3520 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3522 return !
DoHighlight || Highlight ?
"brown1" :
"lightpink";
3523 if (AllocTypes == (uint8_t)AllocationType::Cold)
3524 return !
DoHighlight || Highlight ?
"cyan" :
"lightskyblue";
3526 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
3527 return Highlight ?
"magenta" :
"mediumorchid1";
3531 static std::string getNodeId(NodeRef Node) {
3532 std::stringstream SStream;
3533 SStream << std::hex <<
"N0x" << (
unsigned long long)Node;
3534 std::string
Result = SStream.str();
3543template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3548template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3549void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3550 std::string Label)
const {
3555template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3556typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3557CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3558 const std::shared_ptr<ContextEdge> &
Edge,
3559 DenseSet<uint32_t> ContextIdsToMove) {
3561 assert(NodeToCallingFunc.count(Node));
3562 ContextNode *Clone =
3563 createNewNode(
Node->IsAllocation, NodeToCallingFunc[Node],
Node->Call);
3564 Node->addClone(Clone);
3565 Clone->MatchingCalls =
Node->MatchingCalls;
3566 moveEdgeToExistingCalleeClone(
Edge, Clone,
true,
3571template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3572void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3573 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &
Edge,
3574 ContextNode *NewCallee,
bool NewClone,
3575 DenseSet<uint32_t> ContextIdsToMove) {
3578 assert(NewCallee->getOrigNode() ==
Edge->Callee->getOrigNode());
3580 bool EdgeIsRecursive =
Edge->Callee ==
Edge->Caller;
3582 ContextNode *OldCallee =
Edge->Callee;
3586 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(
Edge->Caller);
3590 if (ContextIdsToMove.
empty())
3591 ContextIdsToMove =
Edge->getContextIds();
3595 if (
Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3598 NewCallee->AllocTypes |=
Edge->AllocTypes;
3600 if (ExistingEdgeToNewCallee) {
3603 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3604 ExistingEdgeToNewCallee->AllocTypes |=
Edge->AllocTypes;
3605 assert(
Edge->ContextIds == ContextIdsToMove);
3606 removeEdgeFromGraph(
Edge.get());
3609 Edge->Callee = NewCallee;
3610 NewCallee->CallerEdges.push_back(
Edge);
3612 OldCallee->eraseCallerEdge(
Edge.get());
3619 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3620 if (ExistingEdgeToNewCallee) {
3623 ExistingEdgeToNewCallee->getContextIds().insert_range(ContextIdsToMove);
3624 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3627 auto NewEdge = std::make_shared<ContextEdge>(
3628 NewCallee,
Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3629 Edge->Caller->CalleeEdges.push_back(NewEdge);
3630 NewCallee->CallerEdges.push_back(NewEdge);
3634 NewCallee->AllocTypes |= CallerEdgeAllocType;
3636 Edge->AllocTypes = computeAllocType(
Edge->ContextIds);
3641 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3642 ContextNode *CalleeToUse = OldCalleeEdge->Callee;
3646 if (CalleeToUse == OldCallee) {
3650 if (EdgeIsRecursive) {
3654 CalleeToUse = NewCallee;
3658 DenseSet<uint32_t> EdgeContextIdsToMove =
3660 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3661 OldCalleeEdge->AllocTypes =
3662 computeAllocType(OldCalleeEdge->getContextIds());
3669 if (
auto *NewCalleeEdge = NewCallee->findEdgeFromCallee(CalleeToUse)) {
3670 NewCalleeEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3671 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3675 auto NewEdge = std::make_shared<ContextEdge>(
3676 CalleeToUse, NewCallee, computeAllocType(EdgeContextIdsToMove),
3677 EdgeContextIdsToMove);
3678 NewCallee->CalleeEdges.push_back(NewEdge);
3679 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3683 OldCallee->AllocTypes = OldCallee->computeAllocType();
3685 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3686 OldCallee->emptyContextIds());
3690 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3693 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3699template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3700void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3701 moveCalleeEdgeToNewCaller(
const std::shared_ptr<ContextEdge> &
Edge,
3702 ContextNode *NewCaller) {
3703 auto *OldCallee =
Edge->Callee;
3704 auto *NewCallee = OldCallee;
3707 bool Recursive =
Edge->Caller ==
Edge->Callee;
3709 NewCallee = NewCaller;
3711 ContextNode *OldCaller =
Edge->Caller;
3712 OldCaller->eraseCalleeEdge(
Edge.get());
3716 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(NewCallee);
3718 if (ExistingEdgeToNewCaller) {
3721 ExistingEdgeToNewCaller->getContextIds().insert_range(
3722 Edge->getContextIds());
3723 ExistingEdgeToNewCaller->AllocTypes |=
Edge->AllocTypes;
3724 Edge->ContextIds.clear();
3725 Edge->AllocTypes = (uint8_t)AllocationType::None;
3726 OldCallee->eraseCallerEdge(
Edge.get());
3729 Edge->Caller = NewCaller;
3730 NewCaller->CalleeEdges.push_back(
Edge);
3732 assert(NewCallee == NewCaller);
3735 Edge->Callee = NewCallee;
3736 NewCallee->CallerEdges.push_back(
Edge);
3737 OldCallee->eraseCallerEdge(
Edge.get());
3743 NewCaller->AllocTypes |=
Edge->AllocTypes;
3750 bool IsNewNode = NewCaller->CallerEdges.empty();
3759 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3760 auto OldCallerCaller = OldCallerEdge->Caller;
3764 OldCallerEdge->getContextIds(),
Edge->getContextIds());
3765 if (OldCaller == OldCallerCaller) {
3766 OldCallerCaller = NewCaller;
3772 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3773 OldCallerEdge->AllocTypes =
3774 computeAllocType(OldCallerEdge->getContextIds());
3779 auto *ExistingCallerEdge = NewCaller->findEdgeFromCaller(OldCallerCaller);
3783 if (ExistingCallerEdge) {
3784 ExistingCallerEdge->getContextIds().insert_range(EdgeContextIdsToMove);
3785 ExistingCallerEdge->AllocTypes |=
3786 computeAllocType(EdgeContextIdsToMove);
3789 auto NewEdge = std::make_shared<ContextEdge>(
3790 NewCaller, OldCallerCaller, computeAllocType(EdgeContextIdsToMove),
3791 EdgeContextIdsToMove);
3792 NewCaller->CallerEdges.push_back(NewEdge);
3793 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3798 OldCaller->AllocTypes = OldCaller->computeAllocType();
3800 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3801 OldCaller->emptyContextIds());
3805 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3808 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3814template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3815void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3816 recursivelyRemoveNoneTypeCalleeEdges(
3817 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3822 removeNoneTypeCalleeEdges(Node);
3824 for (
auto *Clone :
Node->Clones)
3825 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3829 auto CallerEdges =
Node->CallerEdges;
3830 for (
auto &
Edge : CallerEdges) {
3832 if (
Edge->isRemoved()) {
3836 recursivelyRemoveNoneTypeCalleeEdges(
Edge->Caller, Visited);
3841template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3842void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges() {
3847 DenseSet<const ContextNode *> Visited;
3848 DenseSet<const ContextNode *> CurrentStack;
3849 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
3851 if (
Node->isRemoved())
3854 if (!
Node->CallerEdges.empty())
3856 markBackedges(Node, Visited, CurrentStack);
3862template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3863void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::markBackedges(
3864 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3865 DenseSet<const ContextNode *> &CurrentStack) {
3866 auto I = Visited.
insert(Node);
3870 for (
auto &CalleeEdge :
Node->CalleeEdges) {
3871 auto *
Callee = CalleeEdge->Callee;
3872 if (Visited.
count(Callee)) {
3875 if (CurrentStack.
count(Callee))
3876 CalleeEdge->IsBackedge =
true;
3879 CurrentStack.
insert(Callee);
3880 markBackedges(Callee, Visited, CurrentStack);
3881 CurrentStack.
erase(Callee);
3885template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3886void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3887 DenseSet<const ContextNode *> Visited;
3888 for (
auto &Entry : AllocationCallToContextNodeMap) {
3890 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3893 for (
auto &Entry : AllocationCallToContextNodeMap)
3894 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3907template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3908void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3909 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3910 const DenseSet<uint32_t> &AllocContextIds) {
3920 if (!
Node->hasCall())
3939 auto CallerEdges =
Node->CallerEdges;
3940 for (
auto &
Edge : CallerEdges) {
3942 if (
Edge->isRemoved()) {
3948 if (
Edge->IsBackedge) {
3955 if (!Visited.
count(
Edge->Caller) && !
Edge->Caller->CloneOf) {
3956 identifyClones(
Edge->Caller, Visited, AllocContextIds);
3979 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3983 [&](
const std::shared_ptr<ContextEdge> &
A,
3984 const std::shared_ptr<ContextEdge> &
B) {
3987 if (A->ContextIds.empty())
3993 if (B->ContextIds.empty())
3996 if (A->AllocTypes == B->AllocTypes)
3999 return *A->ContextIds.begin() < *B->ContextIds.begin();
4000 return AllocTypeCloningPriority[A->AllocTypes] <
4001 AllocTypeCloningPriority[B->AllocTypes];
4004 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4006 DenseSet<uint32_t> RecursiveContextIds;
4011 DenseSet<uint32_t> AllCallerContextIds;
4012 for (
auto &CE :
Node->CallerEdges) {
4015 AllCallerContextIds.
reserve(
CE->getContextIds().size());
4016 for (
auto Id :
CE->getContextIds())
4017 if (!AllCallerContextIds.
insert(Id).second)
4018 RecursiveContextIds.
insert(Id);
4028 auto CallerEdges =
Node->CallerEdges;
4029 for (
auto &CallerEdge : CallerEdges) {
4031 if (CallerEdge->isRemoved()) {
4035 assert(CallerEdge->Callee == Node);
4044 if (!CallerEdge->Caller->hasCall())
4049 auto CallerEdgeContextsForAlloc =
4051 if (!RecursiveContextIds.
empty())
4052 CallerEdgeContextsForAlloc =
4054 if (CallerEdgeContextsForAlloc.empty())
4057 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4061 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
4062 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
4063 for (
auto &CalleeEdge :
Node->CalleeEdges)
4064 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4065 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4081 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
4082 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4083 if (!CallerEdge->IsBackedge &&
4084 allocTypeToUse(CallerAllocTypeForAlloc) ==
4085 allocTypeToUse(
Node->AllocTypes) &&
4086 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
4087 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
4091 if (CallerEdge->IsBackedge) {
4095 DeferredBackedges++;
4108 if (CallerEdge->IsBackedge && !CallerEdge->Caller->CloneOf &&
4109 !Visited.
count(CallerEdge->Caller)) {
4110 const auto OrigIdCount = CallerEdge->getContextIds().size();
4113 identifyClones(CallerEdge->Caller, Visited, CallerEdgeContextsForAlloc);
4114 removeNoneTypeCalleeEdges(CallerEdge->Caller);
4118 bool UpdatedEdge =
false;
4119 if (OrigIdCount > CallerEdge->getContextIds().size()) {
4120 for (
auto E :
Node->CallerEdges) {
4122 if (
E->Caller->CloneOf != CallerEdge->Caller)
4126 auto CallerEdgeContextsForAllocNew =
4128 if (CallerEdgeContextsForAllocNew.empty())
4138 CallerEdgeContextsForAlloc.swap(CallerEdgeContextsForAllocNew);
4148 if (CallerEdge->isRemoved())
4158 CallerEdgeContextsForAlloc, CallerEdge->getContextIds());
4159 if (CallerEdgeContextsForAlloc.empty())
4164 CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
4165 CalleeEdgeAllocTypesForCallerEdge.clear();
4166 for (
auto &CalleeEdge :
Node->CalleeEdges) {
4167 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
4168 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
4174 ContextNode *Clone =
nullptr;
4175 for (
auto *CurClone :
Node->Clones) {
4176 if (allocTypeToUse(CurClone->AllocTypes) !=
4177 allocTypeToUse(CallerAllocTypeForAlloc))
4184 assert(!BothSingleAlloc ||
4185 CurClone->AllocTypes == CallerAllocTypeForAlloc);
4191 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
4192 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
4200 moveEdgeToExistingCalleeClone(CallerEdge, Clone,
false,
4201 CallerEdgeContextsForAlloc);
4203 Clone = moveEdgeToNewCalleeClone(CallerEdge, CallerEdgeContextsForAlloc);
4206 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
4213 assert(
Node->AllocTypes != (uint8_t)AllocationType::None);
4219void ModuleCallsiteContextGraph::updateAllocationCall(
4224 "memprof", AllocTypeString);
4227 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute",
Call.call())
4228 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
4230 <<
" marked with memprof allocation attribute "
4231 <<
ore::NV(
"Attribute", AllocTypeString));
4234void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &
Call,
4238 assert(AI->Versions.size() >
Call.cloneNo());
4243ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4245 if (!CB->getAttributes().hasFnAttr(
"memprof"))
4246 return AllocationType::None;
4247 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4248 ? AllocationType::Cold
4249 : AllocationType::NotCold;
4253IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &
Call)
const {
4255 assert(AI->Versions.size() >
Call.cloneNo());
4259void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4260 FuncInfo CalleeFunc) {
4261 auto *CurF = getCalleeFunc(CallerCall.call());
4262 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4269 if (CurCalleeCloneNo != NewCalleeCloneNo) {
4271 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4273 MismatchedCloneAssignments++;
4276 if (NewCalleeCloneNo > 0)
4277 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
4278 OREGetter(CallerCall.call()->getFunction())
4279 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CallerCall.call())
4280 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
4281 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
4282 <<
" assigned to call function clone "
4283 <<
ore::NV(
"Callee", CalleeFunc.func()));
4286void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
4287 FuncInfo CalleeFunc) {
4290 "Caller cannot be an allocation which should not have profiled calls");
4291 assert(CI->Clones.size() > CallerCall.cloneNo());
4292 auto NewCalleeCloneNo = CalleeFunc.cloneNo();
4293 auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
4298 if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
4300 << CurCalleeCloneNo <<
" now " << NewCalleeCloneNo
4302 MismatchedCloneAssignments++;
4304 CurCalleeCloneNo = NewCalleeCloneNo;
4316 SP->replaceLinkageName(MDName);
4320 TempDISubprogram NewDecl = Decl->
clone();
4321 NewDecl->replaceLinkageName(MDName);
4325CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
4327ModuleCallsiteContextGraph::cloneFunctionForCallsite(
4328 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4329 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4334 assert(!
Func.func()->getParent()->getFunction(Name));
4335 NewFunc->setName(Name);
4337 for (
auto &Inst : CallsWithMetadataInFunc) {
4339 assert(Inst.cloneNo() == 0);
4342 OREGetter(
Func.func())
4343 .emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofClone",
Func.func())
4344 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
4345 return {NewFunc, CloneNo};
4348CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
4349 IndexCall>::FuncInfo
4350IndexCallsiteContextGraph::cloneFunctionForCallsite(
4351 FuncInfo &Func, CallInfo &
Call, DenseMap<CallInfo, CallInfo> &CallMap,
4352 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
4366 for (
auto &Inst : CallsWithMetadataInFunc) {
4368 assert(Inst.cloneNo() == 0);
4370 assert(AI->Versions.size() == CloneNo);
4373 AI->Versions.push_back(0);
4376 assert(CI && CI->Clones.size() == CloneNo);
4379 CI->Clones.push_back(0);
4381 CallMap[Inst] = {Inst.call(), CloneNo};
4383 return {
Func.func(), CloneNo};
4400template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4401void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones() {
4407 DenseMap<uint32_t, ContextNode *> ContextIdToAllocationNode;
4408 for (
auto &Entry : AllocationCallToContextNodeMap) {
4410 for (
auto Id :
Node->getContextIds())
4411 ContextIdToAllocationNode[
Id] =
Node->getOrigNode();
4412 for (
auto *Clone :
Node->Clones) {
4413 for (
auto Id : Clone->getContextIds())
4414 ContextIdToAllocationNode[
Id] = Clone->getOrigNode();
4421 DenseSet<const ContextNode *> Visited;
4422 for (
auto &Entry : AllocationCallToContextNodeMap) {
4425 mergeClones(Node, Visited, ContextIdToAllocationNode);
4431 auto Clones =
Node->Clones;
4432 for (
auto *Clone : Clones)
4433 mergeClones(Clone, Visited, ContextIdToAllocationNode);
4437 dbgs() <<
"CCG after merging:\n";
4441 exportToDot(
"aftermerge");
4449template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4450void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones(
4451 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4452 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4462 bool FoundUnvisited =
true;
4464 while (FoundUnvisited) {
4466 FoundUnvisited =
false;
4469 auto CallerEdges =
Node->CallerEdges;
4470 for (
auto CallerEdge : CallerEdges) {
4472 if (CallerEdge->Callee != Node)
4477 FoundUnvisited =
true;
4478 mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode);
4482 TotalMergeInvokes++;
4483 TotalMergeIters += Iters;
4484 if (Iters > MaxMergeIters)
4485 MaxMergeIters = Iters;
4488 mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode);
4491template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4492void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeNodeCalleeClones(
4493 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
4494 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode) {
4496 if (
Node->emptyContextIds())
4501 MapVector<ContextNode *, std::vector<std::shared_ptr<ContextEdge>>>
4502 OrigNodeToCloneEdges;
4503 for (
const auto &
E :
Node->CalleeEdges) {
4508 OrigNodeToCloneEdges[
Base].push_back(
E);
4514 auto CalleeCallerEdgeLessThan = [](
const std::shared_ptr<ContextEdge> &
A,
4515 const std::shared_ptr<ContextEdge> &
B) {
4516 if (
A->Callee->CallerEdges.size() !=
B->Callee->CallerEdges.size())
4517 return A->Callee->CallerEdges.size() <
B->Callee->CallerEdges.size();
4518 if (
A->Callee->CloneOf && !
B->Callee->CloneOf)
4520 else if (!
A->Callee->CloneOf &&
B->Callee->CloneOf)
4524 return *
A->ContextIds.begin() < *
B->ContextIds.begin();
4529 for (
auto Entry : OrigNodeToCloneEdges) {
4532 auto &CalleeEdges =
Entry.second;
4533 auto NumCalleeClones = CalleeEdges.size();
4535 if (NumCalleeClones == 1)
4546 DenseSet<ContextNode *> OtherCallersToShareMerge;
4547 findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode,
4548 OtherCallersToShareMerge);
4553 ContextNode *MergeNode =
nullptr;
4554 DenseMap<ContextNode *, unsigned> CallerToMoveCount;
4555 for (
auto CalleeEdge : CalleeEdges) {
4556 auto *OrigCallee = CalleeEdge->Callee;
4562 if (CalleeEdge->Callee->CallerEdges.size() == 1) {
4563 MergeNode = OrigCallee;
4564 NonNewMergedNodes++;
4571 if (!OtherCallersToShareMerge.
empty()) {
4572 bool MoveAllCallerEdges =
true;
4573 for (
auto CalleeCallerE : OrigCallee->CallerEdges) {
4574 if (CalleeCallerE == CalleeEdge)
4576 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller)) {
4577 MoveAllCallerEdges =
false;
4583 if (MoveAllCallerEdges) {
4584 MergeNode = OrigCallee;
4585 NonNewMergedNodes++;
4592 assert(MergeNode != OrigCallee);
4593 moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode,
4596 MergeNode = moveEdgeToNewCalleeClone(CalleeEdge);
4601 if (!OtherCallersToShareMerge.
empty()) {
4605 auto OrigCalleeCallerEdges = OrigCallee->CallerEdges;
4606 for (
auto &CalleeCallerE : OrigCalleeCallerEdges) {
4607 if (CalleeCallerE == CalleeEdge)
4609 if (!OtherCallersToShareMerge.
contains(CalleeCallerE->Caller))
4611 CallerToMoveCount[CalleeCallerE->Caller]++;
4612 moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode,
4616 removeNoneTypeCalleeEdges(OrigCallee);
4617 removeNoneTypeCalleeEdges(MergeNode);
4635template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4636void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
4637 findOtherCallersToShareMerge(
4639 std::vector<std::shared_ptr<ContextEdge>> &CalleeEdges,
4640 DenseMap<uint32_t, ContextNode *> &ContextIdToAllocationNode,
4641 DenseSet<ContextNode *> &OtherCallersToShareMerge) {
4642 auto NumCalleeClones = CalleeEdges.size();
4645 DenseMap<ContextNode *, unsigned> OtherCallersToSharedCalleeEdgeCount;
4648 unsigned PossibleOtherCallerNodes = 0;
4652 if (CalleeEdges[0]->
Callee->CallerEdges.size() < 2)
4658 DenseMap<ContextEdge *, DenseSet<ContextNode *>> CalleeEdgeToAllocNodes;
4659 for (
auto CalleeEdge : CalleeEdges) {
4660 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4663 for (
auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) {
4664 if (CalleeCallerEdges->Caller == Node) {
4665 assert(CalleeCallerEdges == CalleeEdge);
4668 OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++;
4671 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] ==
4673 PossibleOtherCallerNodes++;
4677 for (
auto Id : CalleeEdge->getContextIds()) {
4678 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4682 MissingAllocForContextId++;
4685 CalleeEdgeToAllocNodes[CalleeEdge.get()].
insert(
Alloc);
4692 for (
auto CalleeEdge : CalleeEdges) {
4693 assert(CalleeEdge->Callee->CallerEdges.size() > 1);
4695 if (!PossibleOtherCallerNodes)
4697 auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()];
4699 for (
auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) {
4701 if (CalleeCallerE == CalleeEdge)
4705 if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] !=
4710 for (
auto Id : CalleeCallerE->getContextIds()) {
4711 auto *
Alloc = ContextIdToAllocationNode.
lookup(Id);
4716 if (!CurCalleeAllocNodes.contains(
Alloc)) {
4717 OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0;
4718 PossibleOtherCallerNodes--;
4725 if (!PossibleOtherCallerNodes)
4730 for (
auto &[OtherCaller,
Count] : OtherCallersToSharedCalleeEdgeCount) {
4731 if (
Count != NumCalleeClones)
4733 OtherCallersToShareMerge.
insert(OtherCaller);
4778template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4779bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
4786 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
4790 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
4791 const FuncInfo &CalleeFunc) {
4793 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
4797 struct FuncCloneInfo {
4802 DenseMap<CallInfo, CallInfo> CallMap;
4830 DenseMap<const ContextNode *, std::map<unsigned, SmallVector<CallInfo, 0>>>
4831 UnassignedCallClones;
4835 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
4836 FuncInfo OrigFunc(Func);
4841 std::vector<FuncCloneInfo> FuncCloneInfos;
4842 for (
auto &
Call : CallsWithMetadata) {
4843 ContextNode *
Node = getNodeForInst(
Call);
4847 if (!Node ||
Node->Clones.empty())
4850 "Not having a call should have prevented cloning");
4854 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
4858 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
4860 ContextNode *CallsiteClone,
4863 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
4865 assert(FuncCloneInfos.size() > FuncClone.cloneNo());
4866 DenseMap<CallInfo, CallInfo> &CallMap =
4867 FuncCloneInfos[FuncClone.cloneNo()].CallMap;
4868 CallInfo CallClone(
Call);
4869 if (
auto It = CallMap.
find(
Call); It != CallMap.
end())
4870 CallClone = It->second;
4871 CallsiteClone->setCall(CallClone);
4873 for (
auto &MatchingCall :
Node->MatchingCalls) {
4874 CallInfo CallClone(MatchingCall);
4875 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end())
4876 CallClone = It->second;
4878 MatchingCall = CallClone;
4886 auto MoveEdgeToNewCalleeCloneAndSetUp =
4887 [&](
const std::shared_ptr<ContextEdge> &
Edge) {
4888 ContextNode *OrigCallee =
Edge->Callee;
4889 ContextNode *NewClone = moveEdgeToNewCalleeClone(
Edge);
4890 removeNoneTypeCalleeEdges(NewClone);
4891 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4895 if (CallsiteToCalleeFuncCloneMap.
count(OrigCallee))
4896 RecordCalleeFuncOfCallsite(
4897 NewClone, CallsiteToCalleeFuncCloneMap[OrigCallee]);
4904 std::deque<ContextNode *> ClonesWorklist;
4906 if (!
Node->emptyContextIds())
4907 ClonesWorklist.push_back(Node);
4913 unsigned NodeCloneCount = 0;
4914 while (!ClonesWorklist.empty()) {
4915 ContextNode *Clone = ClonesWorklist.front();
4916 ClonesWorklist.pop_front();
4925 if (FuncCloneInfos.size() < NodeCloneCount) {
4927 if (NodeCloneCount == 1) {
4932 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4933 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4937 FuncCloneInfos.push_back(
4938 {OrigFunc, DenseMap<CallInfo, CallInfo>()});
4939 AssignCallsiteCloneToFuncClone(
4940 OrigFunc,
Call, Clone,
4941 AllocationCallToContextNodeMap.count(
Call));
4942 for (
auto &CE : Clone->CallerEdges) {
4944 if (!
CE->Caller->hasCall())
4946 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
4956 FuncInfo PreviousAssignedFuncClone;
4958 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &
E) {
4959 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
4961 bool CallerAssignedToCloneOfFunc =
false;
4962 if (EI != Clone->CallerEdges.end()) {
4963 const std::shared_ptr<ContextEdge> &
Edge = *EI;
4964 PreviousAssignedFuncClone =
4965 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
4966 CallerAssignedToCloneOfFunc =
true;
4971 DenseMap<CallInfo, CallInfo> NewCallMap;
4972 unsigned CloneNo = FuncCloneInfos.size();
4973 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
4974 "should already exist in the map");
4975 FuncInfo NewFuncClone = cloneFunctionForCallsite(
4976 OrigFunc,
Call, NewCallMap, CallsWithMetadata, CloneNo);
4977 FuncCloneInfos.push_back({NewFuncClone, std::move(NewCallMap)});
4978 FunctionClonesAnalysis++;
4984 if (!CallerAssignedToCloneOfFunc) {
4985 AssignCallsiteCloneToFuncClone(
4986 NewFuncClone,
Call, Clone,
4987 AllocationCallToContextNodeMap.count(
Call));
4988 for (
auto &CE : Clone->CallerEdges) {
4990 if (!
CE->Caller->hasCall())
4992 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5004 auto CallerEdges = Clone->CallerEdges;
5005 for (
auto CE : CallerEdges) {
5007 if (
CE->isRemoved()) {
5013 if (!
CE->Caller->hasCall())
5016 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
5020 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
5021 PreviousAssignedFuncClone)
5024 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
5037 auto CalleeEdges =
CE->Caller->CalleeEdges;
5038 for (
auto CalleeEdge : CalleeEdges) {
5041 if (CalleeEdge->isRemoved()) {
5046 ContextNode *
Callee = CalleeEdge->Callee;
5050 if (Callee == Clone || !
Callee->hasCall())
5055 if (Callee == CalleeEdge->Caller)
5057 ContextNode *NewClone =
5058 MoveEdgeToNewCalleeCloneAndSetUp(CalleeEdge);
5061 removeNoneTypeCalleeEdges(Callee);
5069 CallInfo OrigCall(
Callee->getOrigNode()->Call);
5070 OrigCall.setCloneNo(0);
5071 DenseMap<CallInfo, CallInfo> &CallMap =
5072 FuncCloneInfos[NewFuncClone.cloneNo()].CallMap;
5074 CallInfo NewCall(CallMap[OrigCall]);
5076 NewClone->setCall(NewCall);
5078 for (
auto &MatchingCall : NewClone->MatchingCalls) {
5079 CallInfo OrigMatchingCall(MatchingCall);
5080 OrigMatchingCall.setCloneNo(0);
5082 CallInfo NewCall(CallMap[OrigMatchingCall]);
5085 MatchingCall = NewCall;
5094 auto FindFirstAvailFuncClone = [&]() {
5099 for (
auto &CF : FuncCloneInfos) {
5100 if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
5101 return CF.FuncClone;
5104 "Expected an available func clone for this callsite clone");
5121 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
5122 FuncInfo FuncCloneAssignedToCurCallsiteClone;
5126 auto CloneCallerEdges = Clone->CallerEdges;
5127 for (
auto &
Edge : CloneCallerEdges) {
5131 if (
Edge->isRemoved())
5134 if (!
Edge->Caller->hasCall())
5138 if (CallsiteToCalleeFuncCloneMap.
count(
Edge->Caller)) {
5139 FuncInfo FuncCloneCalledByCaller =
5140 CallsiteToCalleeFuncCloneMap[
Edge->Caller];
5150 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
5151 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
5159 (FuncCloneAssignedToCurCallsiteClone &&
5160 FuncCloneAssignedToCurCallsiteClone !=
5161 FuncCloneCalledByCaller)) {
5176 if (FuncCloneToNewCallsiteCloneMap.count(
5177 FuncCloneCalledByCaller)) {
5178 ContextNode *NewClone =
5179 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
5180 moveEdgeToExistingCalleeClone(
Edge, NewClone);
5182 removeNoneTypeCalleeEdges(NewClone);
5185 ContextNode *NewClone = MoveEdgeToNewCalleeCloneAndSetUp(
Edge);
5186 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
5189 ClonesWorklist.push_back(NewClone);
5193 removeNoneTypeCalleeEdges(Clone);
5201 if (!FuncCloneAssignedToCurCallsiteClone) {
5202 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
5204 AssignCallsiteCloneToFuncClone(
5205 FuncCloneCalledByCaller,
Call, Clone,
5206 AllocationCallToContextNodeMap.count(
Call));
5210 assert(FuncCloneAssignedToCurCallsiteClone ==
5211 FuncCloneCalledByCaller);
5220 if (!FuncCloneAssignedToCurCallsiteClone) {
5221 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5222 assert(FuncCloneAssignedToCurCallsiteClone);
5224 AssignCallsiteCloneToFuncClone(
5225 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5226 AllocationCallToContextNodeMap.count(
Call));
5228 assert(FuncCloneToCurNodeCloneMap
5229 [FuncCloneAssignedToCurCallsiteClone] == Clone);
5231 RecordCalleeFuncOfCallsite(
Edge->Caller,
5232 FuncCloneAssignedToCurCallsiteClone);
5252 if (!FuncCloneAssignedToCurCallsiteClone) {
5253 FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
5254 assert(FuncCloneAssignedToCurCallsiteClone &&
5255 "No available func clone for this callsite clone");
5256 AssignCallsiteCloneToFuncClone(
5257 FuncCloneAssignedToCurCallsiteClone,
Call, Clone,
5258 AllocationCallToContextNodeMap.contains(
Call));
5263 for (
const auto &PE :
Node->CalleeEdges)
5265 for (
const auto &CE :
Node->CallerEdges)
5267 for (
auto *Clone :
Node->Clones) {
5269 for (
const auto &PE : Clone->CalleeEdges)
5271 for (
const auto &CE : Clone->CallerEdges)
5277 if (FuncCloneInfos.size() < 2)
5283 for (
auto &
Call : CallsWithMetadata) {
5284 ContextNode *
Node = getNodeForInst(
Call);
5285 if (!Node || !
Node->hasCall() ||
Node->emptyContextIds())
5291 if (
Node->Clones.size() + 1 >= FuncCloneInfos.size())
5295 DenseSet<unsigned> NodeCallClones;
5296 for (
auto *
C :
Node->Clones)
5297 NodeCallClones.
insert(
C->Call.cloneNo());
5300 for (
auto &FC : FuncCloneInfos) {
5305 if (++
I == 1 || NodeCallClones.
contains(
I)) {
5310 auto &CallVector = UnassignedCallClones[
Node][
I];
5311 DenseMap<CallInfo, CallInfo> &CallMap =
FC.CallMap;
5312 if (
auto It = CallMap.
find(
Call); It != CallMap.
end()) {
5313 CallInfo CallClone = It->second;
5314 CallVector.push_back(CallClone);
5318 assert(
false &&
"Expected to find call in CallMap");
5321 for (
auto &MatchingCall :
Node->MatchingCalls) {
5322 if (
auto It = CallMap.
find(MatchingCall); It != CallMap.
end()) {
5323 CallInfo CallClone = It->second;
5324 CallVector.push_back(CallClone);
5328 assert(
false &&
"Expected to find call in CallMap");
5336 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
5338 auto UpdateCalls = [&](ContextNode *
Node,
5339 DenseSet<const ContextNode *> &Visited,
5340 auto &&UpdateCalls) {
5341 auto Inserted = Visited.insert(Node);
5345 for (
auto *Clone :
Node->Clones)
5346 UpdateCalls(Clone, Visited, UpdateCalls);
5348 for (
auto &
Edge :
Node->CallerEdges)
5349 UpdateCalls(
Edge->Caller, Visited, UpdateCalls);
5353 if (!
Node->hasCall() ||
Node->emptyContextIds())
5356 if (
Node->IsAllocation) {
5357 auto AT = allocTypeToUse(
Node->AllocTypes);
5363 !ContextIdToContextSizeInfos.empty()) {
5364 uint64_t TotalCold = 0;
5366 for (
auto Id :
Node->getContextIds()) {
5367 auto TypeI = ContextIdToAllocationType.find(Id);
5368 assert(TypeI != ContextIdToAllocationType.end());
5369 auto CSI = ContextIdToContextSizeInfos.find(Id);
5370 if (CSI != ContextIdToContextSizeInfos.end()) {
5371 for (
auto &
Info : CSI->second) {
5373 if (TypeI->second == AllocationType::Cold)
5374 TotalCold +=
Info.TotalSize;
5379 AT = AllocationType::Cold;
5381 updateAllocationCall(
Node->Call, AT);
5386 if (!CallsiteToCalleeFuncCloneMap.
count(Node))
5389 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
5390 updateCall(
Node->Call, CalleeFunc);
5392 for (
auto &
Call :
Node->MatchingCalls)
5393 updateCall(
Call, CalleeFunc);
5397 if (!UnassignedCallClones.
contains(Node))
5399 DenseSet<unsigned> NodeCallClones;
5400 for (
auto *
C :
Node->Clones)
5401 NodeCallClones.
insert(
C->Call.cloneNo());
5403 auto &ClonedCalls = UnassignedCallClones[
Node];
5404 for (
auto &[CloneNo, CallVector] : ClonedCalls) {
5408 if (NodeCallClones.
contains(CloneNo))
5411 for (
auto &
Call : CallVector)
5412 updateCall(
Call, CalleeFunc);
5421 DenseSet<const ContextNode *> Visited;
5422 for (
auto &Entry : AllocationCallToContextNodeMap)
5423 UpdateCalls(
Entry.second, Visited, UpdateCalls);
5434 for (
auto &SN : FS->callsites()) {
5439 SN.Clones.size() >
I &&
5440 "Callsite summary has fewer entries than other summaries in function");
5441 if (SN.Clones.size() <=
I || !SN.Clones[
I])
5448 for (
auto &AN : FS->allocs()) {
5452 assert(AN.Versions.size() >
I &&
5453 "Alloc summary has fewer entries than other summaries in function");
5454 if (AN.Versions.size() <=
I ||
5471 NewGV->takeName(DeclGV);
5478 auto CloneFuncAliases = [&](
Function *NewF,
unsigned I) {
5479 if (!FuncToAliasMap.count(&
F))
5481 for (
auto *
A : FuncToAliasMap[&
F]) {
5483 auto *PrevA = M.getNamedAlias(AliasName);
5485 A->getType()->getPointerAddressSpace(),
5486 A->getLinkage(), AliasName, NewF);
5487 NewA->copyAttributesFrom(
A);
5489 TakeDeclNameAndReplace(PrevA, NewA);
5498 FunctionsClonedThinBackend++;
5515 for (
unsigned I = 1;
I < NumClones;
I++) {
5516 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
5523 FunctionCloneDuplicatesThinBackend++;
5524 auto *Func = HashToFunc[Hash];
5525 if (Func->hasAvailableExternallyLinkage()) {
5531 auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
5533 <<
"created clone decl " <<
ore::NV(
"Decl", Decl.getCallee()));
5536 auto *PrevF = M.getFunction(Name);
5539 TakeDeclNameAndReplace(PrevF, Alias);
5541 <<
"created clone alias " <<
ore::NV(
"Alias", Alias));
5544 CloneFuncAliases(Func,
I);
5548 HashToFunc[Hash] = NewF;
5549 FunctionClonesThinBackend++;
5552 for (
auto &BB : *NewF) {
5553 for (
auto &Inst : BB) {
5554 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
5555 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
5560 TakeDeclNameAndReplace(PrevF, NewF);
5562 NewF->setName(Name);
5565 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
5568 CloneFuncAliases(NewF,
I);
5577 const Function *CallingFunc =
nullptr) {
5596 auto SrcFileMD =
F.getMetadata(
"thinlto_src_file");
5602 if (!SrcFileMD &&
F.isDeclaration()) {
5606 SrcFileMD = CallingFunc->getMetadata(
"thinlto_src_file");
5611 assert(SrcFileMD || OrigName ==
F.getName());
5613 StringRef SrcFile = M.getSourceFileName();
5625 if (!TheFnVI && OrigName ==
F.getName() &&
F.hasLocalLinkage() &&
5626 F.getName().contains(
'.')) {
5627 OrigName =
F.getName().rsplit(
'.').first;
5636 assert(TheFnVI ||
F.isDeclaration());
5640bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
5642 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
5643 Symtab = std::make_unique<InstrProfSymtab>();
5654 if (
Error E = Symtab->create(M,
true,
false)) {
5655 std::string SymtabFailure =
toString(std::move(
E));
5656 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
5669 auto MIBIter = AllocNode.
MIBs.begin();
5670 for (
auto &MDOp : MemProfMD->
operands()) {
5672 auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
5677 auto ContextIterBegin =
5681 (ContextIterBegin != StackContext.
end() && *ContextIterBegin == 0) ? 1
5683 for (
auto ContextIter = ContextIterBegin; ContextIter != StackContext.
end();
5688 if (LastStackContextId == *ContextIter)
5690 LastStackContextId = *ContextIter;
5691 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
5701bool MemProfContextDisambiguation::applyImport(
Module &M) {
5708 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
5710 for (
auto &
A :
M.aliases()) {
5711 auto *Aliasee =
A.getAliaseeObject();
5713 FuncToAliasMap[
F].insert(&
A);
5716 if (!initializeIndirectCallPromotionInfo(M))
5723 OptimizationRemarkEmitter ORE(&
F);
5726 bool ClonesCreated =
false;
5727 unsigned NumClonesCreated = 0;
5728 auto CloneFuncIfNeeded = [&](
unsigned NumClones, FunctionSummary *
FS) {
5738 if (ClonesCreated) {
5739 assert(NumClonesCreated == NumClones);
5746 ClonesCreated =
true;
5747 NumClonesCreated = NumClones;
5750 auto CloneCallsite = [&](
const CallsiteInfo &StackNode, CallBase *CB,
5751 Function *CalledFunction, FunctionSummary *
FS) {
5753 CloneFuncIfNeeded(StackNode.
Clones.
size(), FS);
5765 if (CalledFunction != CB->getCalledOperand() &&
5766 (!GA || CalledFunction != GA->getAliaseeObject())) {
5767 SkippedCallsCloning++;
5773 auto CalleeOrigName = CalledFunction->getName();
5774 for (
unsigned J = 0; J < StackNode.
Clones.
size(); J++) {
5777 if (J > 0 && VMaps[J - 1]->
empty())
5781 if (!StackNode.
Clones[J])
5783 auto NewF =
M.getOrInsertFunction(
5785 CalledFunction->getFunctionType());
5799 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofCall", CBClone)
5800 <<
ore::NV(
"Call", CBClone) <<
" in clone "
5802 <<
" assigned to call function clone "
5803 <<
ore::NV(
"Callee", NewF.getCallee()));
5817 ImportSummary->findSummaryInModule(TheFnVI,
M.getModuleIdentifier());
5821 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
5823 "enable-import-metadata is needed to emit thinlto_src_module");
5824 StringRef SrcModule =
5827 if (GVS->modulePath() == SrcModule) {
5828 GVSummary = GVS.get();
5832 assert(GVSummary && GVSummary->modulePath() == SrcModule);
5842 if (
FS->allocs().empty() &&
FS->callsites().empty())
5845 auto SI =
FS->callsites().begin();
5846 auto AI =
FS->allocs().begin();
5851 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
5854 for (
auto CallsiteIt =
FS->callsites().rbegin();
5855 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
5856 auto &Callsite = *CallsiteIt;
5860 if (!Callsite.StackIdIndices.empty())
5862 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
5871 for (
auto &BB :
F) {
5872 for (
auto &
I : BB) {
5878 auto *CalledValue = CB->getCalledOperand();
5879 auto *CalledFunction = CB->getCalledFunction();
5880 if (CalledValue && !CalledFunction) {
5881 CalledValue = CalledValue->stripPointerCasts();
5888 assert(!CalledFunction &&
5889 "Expected null called function in callsite for alias");
5893 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
5894 I.getMetadata(LLVMContext::MD_callsite));
5895 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
5901 if (CB->getAttributes().hasFnAttr(
"memprof") && !MemProfMD) {
5902 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
5903 ? AllocTypeColdThinBackend++
5904 : AllocTypeNotColdThinBackend++;
5905 OrigAllocsThinBackend++;
5906 AllocVersionsThinBackend++;
5907 if (!MaxAllocVersionsThinBackend)
5908 MaxAllocVersionsThinBackend = 1;
5915 auto &AllocNode = *(AI++);
5923 CloneFuncIfNeeded(AllocNode.Versions.size(), FS);
5925 OrigAllocsThinBackend++;
5926 AllocVersionsThinBackend += AllocNode.Versions.size();
5927 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
5928 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
5938 if (AllocNode.Versions.size() == 1 &&
5941 AllocationType::NotCold ||
5943 AllocationType::None);
5944 UnclonableAllocsThinBackend++;
5950 return Type == ((uint8_t)AllocationType::NotCold |
5951 (uint8_t)AllocationType::Cold);
5955 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
5958 if (J > 0 && VMaps[J - 1]->
empty())
5961 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
5964 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
5965 : AllocTypeNotColdThinBackend++;
5980 ORE.emit(OptimizationRemark(
DEBUG_TYPE,
"MemprofAttribute", CBClone)
5981 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
5983 <<
" marked with memprof allocation attribute "
5984 <<
ore::NV(
"Attribute", AllocTypeString));
5986 }
else if (!CallsiteContext.empty()) {
5987 if (!CalledFunction) {
5991 assert(!CI || !CI->isInlineAsm());
6001 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
6007 CloneFuncIfNeeded(NumClones, FS);
6012 assert(SI !=
FS->callsites().end());
6013 auto &StackNode = *(
SI++);
6019 for (
auto StackId : CallsiteContext) {
6021 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
6027 CloneCallsite(StackNode, CB, CalledFunction, FS);
6029 }
else if (CB->isTailCall() && CalledFunction) {
6032 ValueInfo CalleeVI =
6034 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
6035 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
6036 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
6037 CloneCallsite(Callsite->second, CB, CalledFunction, FS);
6044 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
6054 for (
auto &BB :
F) {
6055 for (
auto &
I : BB) {
6058 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
6059 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
6067unsigned MemProfContextDisambiguation::recordICPInfo(
6072 uint32_t NumCandidates;
6073 uint64_t TotalCount;
6074 auto CandidateProfileData =
6075 ICallAnalysis->getPromotionCandidatesForInstruction(
6077 if (CandidateProfileData.empty())
6083 bool ICPNeeded =
false;
6084 unsigned NumClones = 0;
6085 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
6086 for (
const auto &Candidate : CandidateProfileData) {
6088 auto CalleeValueInfo =
6090 ImportSummary->getValueInfo(Candidate.Value);
6093 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
6095 auto &StackNode = *(
SI++);
6100 [](
unsigned CloneNo) { return CloneNo != 0; });
6110 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
6111 TotalCount, CallsiteInfoStartIndex});
6115void MemProfContextDisambiguation::performICP(
6117 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
6119 OptimizationRemarkEmitter &ORE) {
6126 for (
auto &
Info : ICallAnalysisInfo) {
6129 auto TotalCount =
Info.TotalCount;
6130 unsigned NumPromoted = 0;
6131 unsigned NumClones = 0;
6133 for (
auto &Candidate :
Info.CandidateProfileData) {
6144 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
6145 if (TargetFunction ==
nullptr ||
6153 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToFindTarget", CB)
6154 <<
"Memprof cannot promote indirect call: target with md5sum "
6155 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
6164 const char *Reason =
nullptr;
6167 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnableToPromote", CB)
6168 <<
"Memprof cannot promote indirect call to "
6169 <<
ore::NV(
"TargetFunction", TargetFunction)
6170 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
6181 CallBase *CBClone = CB;
6182 for (
unsigned J = 0; J < NumClones; J++) {
6185 if (J > 0 && VMaps[J - 1]->
empty())
6195 TotalCount, isSamplePGO, &ORE);
6196 auto *TargetToUse = TargetFunction;
6199 if (StackNode.
Clones[J]) {
6218 <<
ore::NV(
"Call", CBClone) <<
" in clone "
6220 <<
" promoted and assigned to call function clone "
6221 <<
ore::NV(
"Callee", TargetToUse));
6225 TotalCount -= Candidate.Count;
6230 CallBase *CBClone = CB;
6231 for (
unsigned J = 0; J < NumClones; J++) {
6234 if (J > 0 && VMaps[J - 1]->
empty())
6240 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
6243 if (TotalCount != 0)
6245 M, *CBClone,
ArrayRef(
Info.CandidateProfileData).slice(NumPromoted),
6246 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
6251template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
6252bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
6254 dbgs() <<
"CCG before cloning:\n";
6258 exportToDot(
"postbuild");
6271 dbgs() <<
"CCG after cloning:\n";
6275 exportToDot(
"cloned");
6277 bool Changed = assignFunctions();
6280 dbgs() <<
"CCG after assigning function clones:\n";
6284 exportToDot(
"clonefuncassign");
6287 printTotalSizes(
errs());
6292bool MemProfContextDisambiguation::processModule(
6294 llvm::function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
6299 return applyImport(M);
6312 ModuleCallsiteContextGraph CCG(M, OREGetter);
6313 return CCG.process();
6318 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
6323 "-memprof-dot-scope=alloc requires -memprof-dot-alloc-id");
6327 "-memprof-dot-scope=context requires -memprof-dot-context-id");
6331 "-memprof-dot-scope=all can't have both -memprof-dot-alloc-id and "
6332 "-memprof-dot-context-id");
6333 if (ImportSummary) {
6343 auto ReadSummaryFile =
6345 if (!ReadSummaryFile) {
6352 if (!ImportSummaryForTestingOrErr) {
6358 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
6359 ImportSummary = ImportSummaryForTesting.get();
6368 if (!processModule(M, OREGetter))
6385 IndexCallsiteContextGraph CCG(Index, isPrevailing);
6401 for (
auto &BB :
F) {
6402 for (
auto &
I : BB) {
6406 if (CI->hasFnAttr(
"memprof")) {
6407 CI->removeFnAttr(
"memprof");
6410 if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
6411 assert(!CI->hasMetadata(LLVMContext::MD_memprof));
6417 CI->setMetadata(LLVMContext::MD_memprof,
nullptr);
6418 CI->setMetadata(LLVMContext::MD_callsite,
nullptr);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
Unify divergent function exit nodes
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
uint64_t ComputeHash(const FunctionSummary *FS, unsigned I)
static cl::opt< DotScope > DotGraphScope("memprof-dot-scope", cl::desc("Scope of graph to export to dot"), cl::Hidden, cl::init(DotScope::All), cl::values(clEnumValN(DotScope::All, "all", "Export full callsite graph"), clEnumValN(DotScope::Alloc, "alloc", "Export only nodes with contexts feeding given " "-memprof-dot-alloc-id"), clEnumValN(DotScope::Context, "context", "Export only nodes with given -memprof-dot-context-id")))
static cl::opt< bool > DoMergeIteration("memprof-merge-iteration", cl::init(true), cl::Hidden, cl::desc("Iteratively apply merging on a node to catch new callers"))
static bool isMemProfClone(const Function &F)
static cl::opt< unsigned > AllocIdForDot("memprof-dot-alloc-id", cl::init(0), cl::Hidden, cl::desc("Id of alloc to export if -memprof-dot-scope=alloc " "or to highlight if -memprof-dot-scope=all"))
static cl::opt< unsigned > ContextIdForDot("memprof-dot-context-id", cl::init(0), cl::Hidden, cl::desc("Id of context to export if -memprof-dot-scope=context or to " "highlight otherwise"))
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(true), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary, const Function *CallingFunc=nullptr)
static cl::opt< bool > CloneRecursiveContexts("memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static std::string getAllocTypeString(uint8_t AllocTypes)
static cl::opt< unsigned > MemProfICPNoInlineThreshold("memprof-icp-noinline-threshold", cl::init(2), cl::Hidden, cl::desc("Minimum absolute count for promoted target to be inlinable"))
bool DOTGraphTraits< constCallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * >::DoHighlight
static unsigned getMemProfCloneNum(const Function &F)
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap, FunctionSummary *FS)
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static cl::opt< bool > MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, cl::desc("Merge clones before assigning functions"))
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static void updateSubprogramLinkageName(Function *NewFunc, StringRef Name)
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts having recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static void checkAllocContextIds(const AllocInfo &AllocNode, const MDNode *MemProfMD, const CallStack< MDNode, MDNode::op_iterator > &CallsiteContext, const ModuleSummaryIndex *ImportSummary)
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
ValueInfo getAliaseeVI() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledOperand(Value *V)
Subprogram description. Uses SubclassData1.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing module and deletes it.
static LLVM_ABI std::string getGlobalIdentifier(StringRef Name, GlobalValue::LinkageTypes Linkage, StringRef FileName)
Return the modified name for a global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI TempMDNode clone() const
Create a (temporary) clone of this.
static std::enable_if_t< std::is_base_of< MDNode, T >::value, T * > replaceWithUniqued(std::unique_ptr< T, TempMDNodeDeleter > N)
Replace a temporary node with a uniqued one.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A class that wrap the SHA1 algorithm.
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Digest more data.
LLVM_ABI std::array< uint8_t, 20 > result()
Return the current raw 160-bits SHA1 for the digested data since the last call to init().
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void insert_range(Range &&R)
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator beginAfterSharedPrefix(const CallStack &Other)
CallStackIterator end() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
LLVM_ABI AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
LLVM_ABI bool metadataMayIncludeContextSizeInfo()
Whether the alloc memprof metadata may include context size info for some MIBs (but possibly not all)...
LLVM_ABI bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
LLVM_ABI std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
LLVM_ABI MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB)
Removes any existing "ambiguous" memprof attribute.
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
uint64_t read64le(const void *P)
void write32le(void *P, uint32_t V)
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< unsigned > MinClonedColdBytePercent("memprof-cloning-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes to hint alloc cold during cloning"))
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
constexpr from_range_t from_range
static cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
cl::opt< unsigned > MemProfTopNImportant("memprof-top-n-important", cl::init(10), cl::Hidden, cl::desc("Number of largest cold contexts to consider important"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
bool set_intersects(const S1Ty &S1, const S2Ty &S2)
set_intersects(A, B) - Return true iff A ^ B is non empty
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
cl::opt< unsigned > MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), cl::Hidden, cl::desc("Max number of summary edges added from " "indirect call profile metadata"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
cl::opt< bool > MemProfFixupImportant("memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts"))
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType G)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static std::string getNodeAttributes(NodeRef Node, GraphType G)
static bool isNodeHidden(NodeRef Node, GraphType G)
static std::string getNodeLabel(NodeRef Node, GraphType G)
GraphTraits< GraphType > GTraits
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
mapped_iterator< typename std::vector< std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > >::const_iterator, decltype(&GetCallee)> ChildIteratorType
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
mapped_iterator< typename std::vector< NodePtrTy >::const_iterator, decltype(&getNode)> nodes_iterator
static ChildIteratorType child_begin(NodeRef N)
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
Summary of memprof metadata on allocations.
std::vector< MIBInfo > MIBs
SmallVector< unsigned > StackIdIndices
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits(bool simple=false)
An information struct used to provide DenseMap with the various necessary components for a given valu...
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(IndexCall &Val)
const PointerUnion< CallsiteInfo *, AllocInfo * > SimpleType
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...