LLVM 23.0.0git
PGOInstrumentation.cpp
Go to the documentation of this file.
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
64#include "llvm/Analysis/CFG.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
76#include "llvm/IR/Dominators.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
133
134#define DEBUG_TYPE "pgo-instrumentation"
135
136STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
137STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
138STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
139STATISTIC(NumOfPGOEdge, "Number of edges.");
140STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
141STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
142STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
143STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
144STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
145STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
146STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
147STATISTIC(NumOfCSPGOSelectInsts,
148 "Number of select instruction instrumented in CSPGO.");
149STATISTIC(NumOfCSPGOMemIntrinsics,
150 "Number of mem intrinsics instrumented in CSPGO.");
151STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
152STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
153STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
154STATISTIC(NumOfCSPGOFunc,
155 "Number of functions having valid profile counts in CSPGO.");
156STATISTIC(NumOfCSPGOMismatch,
157 "Number of functions having mismatch profile in CSPGO.");
158STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
159STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
160
161// Command line option to specify the file to read profile from. This is
162// mainly used for testing.
164 "pgo-test-profile-file", cl::init(""), cl::Hidden,
165 cl::value_desc("filename"),
166 cl::desc("Specify the path of profile data file. This is "
167 "mainly for test purpose."));
169 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
170 cl::value_desc("filename"),
171 cl::desc("Specify the path of profile remapping file. This is mainly for "
172 "test purpose."));
173
174// Command line option to disable value profiling. The default is false:
175// i.e. value profiling is enabled by default. This is for debug purpose.
176static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
178 cl::desc("Disable Value Profiling"));
179
180// Command line option to set the maximum number of VP annotations to write to
181// the metadata for a single indirect call callsite.
183 "icp-max-annotations", cl::init(3), cl::Hidden,
184 cl::desc("Max number of annotations for a single indirect "
185 "call callsite"));
186
187// Command line option to set the maximum number of value annotations
188// to write to the metadata for a single memop intrinsic.
190 "memop-max-annotations", cl::init(4), cl::Hidden,
191 cl::desc("Max number of precise value annotations for a single memop"
192 "intrinsic"));
193
194// Command line option to control appending FunctionHash to the name of a COMDAT
195// function. This is to avoid the hash mismatch caused by the preinliner.
197 "do-comdat-renaming", cl::init(false), cl::Hidden,
198 cl::desc("Append function hash to the name of COMDAT function to avoid "
199 "function hash mismatch due to the preinliner"));
200
201namespace llvm {
202// Command line option to enable/disable the warning about missing profile
203// information.
204cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
206 cl::desc("Use this option to turn on/off "
207 "warnings about missing profile data for "
208 "functions."));
209
210// Command line option to enable/disable the warning about a hash mismatch in
211// the profile data.
213 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
214 cl::desc("Use this option to turn off/on "
215 "warnings about profile cfg mismatch."));
216
217// Command line option to enable/disable the warning about a hash mismatch in
218// the profile data for Comdat functions, which often turns out to be false
219// positive due to the pre-instrumentation inline.
221 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
222 cl::desc("The option is used to turn on/off "
223 "warnings about hash mismatch for comdat "
224 "or weak functions."));
225
226// Command line option to enable/disable select instruction instrumentation.
227static cl::opt<bool>
228 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229 cl::desc("Use this option to turn on/off SELECT "
230 "instruction instrumentation. "));
231
232// Command line option to turn on CFG dot or text dump of raw profile counts
234 "pgo-view-raw-counts", cl::Hidden,
235 cl::desc("A boolean option to show CFG dag or text "
236 "with raw profile counts from "
237 "profile data. See also option "
238 "-pgo-view-counts. To limit graph "
239 "display to only one function, use "
240 "filtering option -view-bfi-func-name."),
241 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243 clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245// Command line option to enable/disable memop intrinsic call.size profiling.
246static cl::opt<bool>
247 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248 cl::desc("Use this option to turn on/off "
249 "memory intrinsic size profiling."));
250
251// Emit branch probability as optimization remarks.
252static cl::opt<bool>
253 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254 cl::desc("When this option is on, the annotated "
255 "branch probability will be emitted as "
256 "optimization remarks: -{Rpass|"
257 "pass-remarks}=pgo-instrumentation"));
258
260 "pgo-instrument-entry", cl::init(false), cl::Hidden,
261 cl::desc("Force to instrument function entry basicblock."));
262
263static cl::opt<bool>
264 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
266 cl::desc("Force to instrument loop entries."));
267
269 "pgo-function-entry-coverage", cl::Hidden,
270 cl::desc(
271 "Use this option to enable function entry coverage instrumentation."));
272
274 "pgo-block-coverage",
275 cl::desc("Use this option to enable basic block coverage instrumentation"));
276
277static cl::opt<bool>
278 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
279 cl::desc("Create a dot file of CFGs with block "
280 "coverage inference information"));
281
283 "pgo-temporal-instrumentation",
284 cl::desc("Use this option to enable temporal instrumentation"));
285
286static cl::opt<bool>
287 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
288 cl::desc("Fix function entry count in profile use."));
289
291 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
292 cl::desc("Print out the non-match BFI count if a hot raw profile count "
293 "becomes non-hot, or a cold raw profile count becomes hot. "
294 "The print is enabled under -Rpass-analysis=pgo, or "
295 "internal option -pass-remarks-analysis=pgo."));
296
298 "pgo-verify-bfi", cl::init(false), cl::Hidden,
299 cl::desc("Print out mismatched BFI counts after setting profile metadata "
300 "The print is enabled under -Rpass-analysis=pgo, or "
301 "internal option -pass-remarks-analysis=pgo."));
302
304 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
305 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
306 "mismatched BFI if the difference percentage is greater than "
307 "this value (in percentage)."));
308
310 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
311 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
312 "profile count value is below."));
313
315 "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
316 cl::value_desc("function name"),
317 cl::desc("Trace the hash of the function with this name."));
318
320 "pgo-function-size-threshold", cl::Hidden,
321 cl::desc("Do not instrument functions smaller than this threshold."));
322
324 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
325 cl::desc("Do not instrument functions with the number of critical edges "
326 " greater than this threshold."));
327
329 "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
330 cl::desc("For cold function instrumentation, skip instrumenting functions "
331 "whose entry count is above the given value."));
332
334 "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
335 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
336 "unprofiled) functions as cold."));
337
339 "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
340 cl::desc("Enable cold function only instrumentation."));
341
343 "ctx-prof-skip-callsite-instr", cl::Hidden,
344 cl::desc("Do not instrument callsites to functions in this list. Intended "
345 "for testing."));
346
348
349// Command line option to turn on CFG dot dump after profile annotation.
350// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
352
353// Command line option to specify the name of the function for CFG dump
354// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
356
357// Command line option to enable vtable value profiling. Defined in
358// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
363} // namespace llvm
364
365namespace {
366class FunctionInstrumenter final {
367 Module &M;
368 Function &F;
370 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
371 BranchProbabilityInfo *const BPI;
372 BlockFrequencyInfo *const BFI;
373 LoopInfo *const LI;
374
375 const PGOInstrumentationType InstrumentationType;
376
377 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
378 // Ctx profiling implicitly captures indirect call cases, but not other
379 // values. Supporting other values is relatively straight-forward - just
380 // another counter range within the context.
381 bool isValueProfilingDisabled() const {
382 return DisableValueProfiling ||
383 InstrumentationType == PGOInstrumentationType::CTXPROF;
384 }
385
386 bool shouldInstrumentEntryBB() const {
387 return PGOInstrumentEntry ||
388 InstrumentationType == PGOInstrumentationType::CTXPROF;
389 }
390
391 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
392
393public:
394 FunctionInstrumenter(
396 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
397 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
398 LoopInfo *LI = nullptr,
400 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
401 LI(LI), InstrumentationType(InstrumentationType) {}
402
403 void instrument();
404};
405} // namespace
406
407// Return a string describing the branch condition that can be
408// used in static branch probability heuristics:
409static std::string getBranchCondString(Instruction *TI) {
411 if (!BI)
412 return std::string();
413
414 Value *Cond = BI->getCondition();
416 if (!CI)
417 return std::string();
418
419 std::string result;
420 raw_string_ostream OS(result);
421 OS << CI->getPredicate() << "_";
422 CI->getOperand(0)->getType()->print(OS, true);
423
424 Value *RHS = CI->getOperand(1);
426 if (CV) {
427 if (CV->isZero())
428 OS << "_Zero";
429 else if (CV->isOne())
430 OS << "_One";
431 else if (CV->isMinusOne())
432 OS << "_MinusOne";
433 else
434 OS << "_Const";
435 }
436 return result;
437}
438
439static const char *ValueProfKindDescr[] = {
440#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
442};
443
444// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
445// aware this is an ir_level profile so it can set the version flag.
446static GlobalVariable *
448 PGOInstrumentationType InstrumentationType) {
450 Type *IntTy64 = Type::getInt64Ty(M.getContext());
452 if (InstrumentationType == PGOInstrumentationType::CSFDO)
453 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
454 if (PGOInstrumentEntry ||
455 InstrumentationType == PGOInstrumentationType::CTXPROF)
456 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
458 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
460 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
462 ProfileVersion |=
465 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
467 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
468 auto IRLevelVersionVariable = new GlobalVariable(
469 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
470 Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
471 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
472
473 Triple TT(M.getTargetTriple());
474 if (TT.supportsCOMDAT()) {
475 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
476 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
477 }
478 return IRLevelVersionVariable;
479}
480
481namespace {
482
483/// The select instruction visitor plays three roles specified
484/// by the mode. In \c VM_counting mode, it simply counts the number of
485/// select instructions. In \c VM_instrument mode, it inserts code to count
486/// the number times TrueValue of select is taken. In \c VM_annotate mode,
487/// it reads the profile data and annotate the select instruction with metadata.
488enum VisitMode { VM_counting, VM_instrument, VM_annotate };
489class PGOUseFunc;
490
491/// Instruction Visitor class to visit select instructions.
492struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
493 Function &F;
494 unsigned NSIs = 0; // Number of select instructions instrumented.
495 VisitMode Mode = VM_counting; // Visiting mode.
496 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
497 unsigned TotalNumCtrs = 0; // Total number of counters
498 GlobalValue *FuncNameVar = nullptr;
499 uint64_t FuncHash = 0;
500 PGOUseFunc *UseFunc = nullptr;
501 bool HasSingleByteCoverage;
502
503 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
504 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
505
506 void countSelects() {
507 NSIs = 0;
508 Mode = VM_counting;
509 visit(F);
510 }
511
512 // Visit the IR stream and instrument all select instructions. \p
513 // Ind is a pointer to the counter index variable; \p TotalNC
514 // is the total number of counters; \p FNV is the pointer to the
515 // PGO function name var; \p FHash is the function hash.
516 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
517 uint64_t FHash) {
518 Mode = VM_instrument;
519 CurCtrIdx = Ind;
520 TotalNumCtrs = TotalNC;
521 FuncHash = FHash;
522 FuncNameVar = FNV;
523 visit(F);
524 }
525
526 // Visit the IR stream and annotate all select instructions.
527 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
528 Mode = VM_annotate;
529 UseFunc = UF;
530 CurCtrIdx = Ind;
531 visit(F);
532 }
533
534 void instrumentOneSelectInst(SelectInst &SI);
535 void annotateOneSelectInst(SelectInst &SI);
536
537 // Visit \p SI instruction and perform tasks according to visit mode.
538 void visitSelectInst(SelectInst &SI);
539
540 // Return the number of select instructions. This needs be called after
541 // countSelects().
542 unsigned getNumOfSelectInsts() const { return NSIs; }
543};
544
545/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
546/// based instrumentation.
547/// Note that the CFG can be a multi-graph. So there might be multiple edges
548/// with the same SrcBB and DestBB.
549struct PGOEdge {
550 BasicBlock *SrcBB;
551 BasicBlock *DestBB;
552 uint64_t Weight;
553 bool InMST = false;
554 bool Removed = false;
555 bool IsCritical = false;
556
557 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
558 : SrcBB(Src), DestBB(Dest), Weight(W) {}
559
560 /// Return the information string of an edge.
561 std::string infoString() const {
562 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
563 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
564 .str();
565 }
566};
567
568/// This class stores the auxiliary information for each BB in the MST.
569struct PGOBBInfo {
570 PGOBBInfo *Group;
571 uint32_t Index;
572 uint32_t Rank = 0;
573
574 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
575
576 /// Return the information string of this object.
577 std::string infoString() const {
578 return (Twine("Index=") + Twine(Index)).str();
579 }
580};
581
582// This class implements the CFG edges. Note the CFG can be a multi-graph.
583template <class Edge, class BBInfo> class FuncPGOInstrumentation {
584private:
585 Function &F;
586
587 // Is this is context-sensitive instrumentation.
588 bool IsCS;
589
590 // A map that stores the Comdat group in function F.
591 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
592
593 ValueProfileCollector VPC;
594
595 void computeCFGHash();
596 void renameComdatFunction();
597
598public:
599 const TargetLibraryInfo &TLI;
600 std::vector<std::vector<VPCandidateInfo>> ValueSites;
601 SelectInstVisitor SIVisitor;
602 std::string FuncName;
603 std::string DeprecatedFuncName;
604 GlobalVariable *FuncNameVar;
605
606 // CFG hash value for this function.
607 uint64_t FunctionHash = 0;
608
609 // The Minimum Spanning Tree of function CFG.
610 CFGMST<Edge, BBInfo> MST;
611
612 const std::optional<BlockCoverageInference> BCI;
613
614 static std::optional<BlockCoverageInference>
615 constructBCI(Function &Func, bool HasSingleByteCoverage,
616 bool InstrumentFuncEntry) {
617 if (HasSingleByteCoverage)
618 return BlockCoverageInference(Func, InstrumentFuncEntry);
619 return {};
620 }
621
622 // Collect all the BBs that will be instrumented, and store them in
623 // InstrumentBBs.
624 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
625
626 // Give an edge, find the BB that will be instrumented.
627 // Return nullptr if there is no BB to be instrumented.
629
630 // Return the auxiliary BB information.
631 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
632
633 // Return the auxiliary BB information if available.
634 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
635
636 // Dump edges and BB information.
637 void dumpInfo(StringRef Str = "") const {
638 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
639 " Hash: " + Twine(FunctionHash) + "\t" + Str);
640 }
641
642 FuncPGOInstrumentation(
643 Function &Func, TargetLibraryInfo &TLI,
644 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
645 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
646 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
647 bool IsCS = false, bool InstrumentFuncEntry = true,
648 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
649 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
650 TLI(TLI), ValueSites(IPVK_Last + 1),
651 SIVisitor(Func, HasSingleByteCoverage),
652 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
653 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
654 if (BCI && PGOViewBlockCoverageGraph)
655 BCI->viewBlockCoverageGraph();
656 // This should be done before CFG hash computation.
657 SIVisitor.countSelects();
658 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
659 if (!IsCS) {
660 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
661 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
662 NumOfPGOBB += MST.bbInfoSize();
663 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
665 ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
666 } else {
667 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
668 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
669 NumOfCSPGOBB += MST.bbInfoSize();
670 }
671
672 FuncName = getIRPGOFuncName(F);
673 DeprecatedFuncName = getPGOFuncName(F);
674 computeCFGHash();
675 if (!ComdatMembers.empty())
676 renameComdatFunction();
677 LLVM_DEBUG(dumpInfo("after CFGMST"));
678
679 for (const auto &E : MST.allEdges()) {
680 if (E->Removed)
681 continue;
682 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
683 if (!E->InMST)
684 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
685 }
686
687 if (CreateGlobalVar)
688 FuncNameVar = createPGOFuncNameVar(F, FuncName);
689 }
690};
691
692} // end anonymous namespace
693
694// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
695// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
696// of selects, indirect calls, mem ops and edges.
697template <class Edge, class BBInfo>
698void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
699 std::vector<uint8_t> Indexes;
700 JamCRC JC;
701 for (auto &BB : F) {
702 for (BasicBlock *Succ : successors(&BB)) {
703 auto BI = findBBInfo(Succ);
704 if (BI == nullptr)
705 continue;
706 uint32_t Index = BI->Index;
707 for (int J = 0; J < 4; J++)
708 Indexes.push_back((uint8_t)(Index >> (J * 8)));
709 }
710 }
711 JC.update(Indexes);
712
713 JamCRC JCH;
714 // The higher 32 bits.
715 auto updateJCH = [&JCH](uint64_t Num) {
716 uint8_t Data[8];
718 JCH.update(Data);
719 };
720 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
721 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
722 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
723 if (BCI) {
724 updateJCH(BCI->getInstrumentedBlocksHash());
725 } else {
726 updateJCH((uint64_t)MST.numEdges());
727 }
728
729 // Hash format for context sensitive profile. Reserve 4 bits for other
730 // information.
731 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
732
733 // Reserve bit 60-63 for other information purpose.
735 if (IsCS)
737 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
738 << " CRC = " << JC.getCRC()
739 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
740 << ", Edges = " << MST.numEdges() << ", ICSites = "
741 << ValueSites[IPVK_IndirectCallTarget].size()
742 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
743 << ", High32 CRC = " << JCH.getCRC()
744 << ", Hash = " << FunctionHash << "\n";);
745
746 if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
747 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
748 << " in building " << F.getParent()->getSourceFileName() << "\n";
749}
750
751// Check if we can safely rename this Comdat function.
752static bool canRenameComdat(
753 Function &F,
754 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
755 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
756 return false;
757
758 // FIXME: Current only handle those Comdat groups that only containing one
759 // function.
760 // (1) For a Comdat group containing multiple functions, we need to have a
761 // unique postfix based on the hashes for each function. There is a
762 // non-trivial code refactoring to do this efficiently.
763 // (2) Variables can not be renamed, so we can not rename Comdat function in a
764 // group including global vars.
765 Comdat *C = F.getComdat();
766 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
767 assert(!isa<GlobalAlias>(CM.second));
768 Function *FM = dyn_cast<Function>(CM.second);
769 if (FM != &F)
770 return false;
771 }
772 return true;
773}
774
775// Append the CFGHash to the Comdat function name.
776template <class Edge, class BBInfo>
777void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
778 if (!canRenameComdat(F, ComdatMembers))
779 return;
780 std::string OrigName = F.getName().str();
781 std::string NewFuncName =
782 Twine(F.getName() + "." + Twine(FunctionHash)).str();
783 F.setName(Twine(NewFuncName));
785 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
786 Comdat *NewComdat;
787 Module *M = F.getParent();
788 // For AvailableExternallyLinkage functions, change the linkage to
789 // LinkOnceODR and put them into comdat. This is because after renaming, there
790 // is no backup external copy available for the function.
791 if (!F.hasComdat()) {
793 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
795 F.setComdat(NewComdat);
796 return;
797 }
798
799 // This function belongs to a single function Comdat group.
800 Comdat *OrigComdat = F.getComdat();
801 std::string NewComdatName =
802 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
803 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
804 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
805
806 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
807 // Must be a function.
808 cast<Function>(CM.second)->setComdat(NewComdat);
809 }
810}
811
812/// Collect all the BBs that will be instruments and add them to
813/// `InstrumentBBs`.
814template <class Edge, class BBInfo>
815void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
816 std::vector<BasicBlock *> &InstrumentBBs) {
817 if (BCI) {
818 for (auto &BB : F)
819 if (BCI->shouldInstrumentBlock(BB))
820 InstrumentBBs.push_back(&BB);
821 return;
822 }
823
824 // Use a worklist as we will update the vector during the iteration.
825 std::vector<Edge *> EdgeList;
826 EdgeList.reserve(MST.numEdges());
827 for (const auto &E : MST.allEdges())
828 EdgeList.push_back(E.get());
829
830 for (auto &E : EdgeList) {
831 BasicBlock *InstrBB = getInstrBB(E);
832 if (InstrBB)
833 InstrumentBBs.push_back(InstrBB);
834 }
835}
836
837// Given a CFG E to be instrumented, find which BB to place the instrumented
838// code. The function will split the critical edge if necessary.
839template <class Edge, class BBInfo>
840BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
841 if (E->InMST || E->Removed)
842 return nullptr;
843
844 BasicBlock *SrcBB = E->SrcBB;
845 BasicBlock *DestBB = E->DestBB;
846 // For a fake edge, instrument the real BB.
847 if (SrcBB == nullptr)
848 return DestBB;
849 if (DestBB == nullptr)
850 return SrcBB;
851
852 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
853 // There are basic blocks (such as catchswitch) cannot be instrumented.
854 // If the returned first insertion point is the end of BB, skip this BB.
855 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
856 return nullptr;
857 return BB;
858 };
859
860 // Instrument the SrcBB if it has a single successor,
861 // otherwise, the DestBB if this is not a critical edge.
862 Instruction *TI = SrcBB->getTerminator();
863 if (TI->getNumSuccessors() <= 1)
864 return canInstrument(SrcBB);
865 if (!E->IsCritical)
866 return canInstrument(DestBB);
867
868 // Some IndirectBr critical edges cannot be split by the previous
869 // SplitIndirectBrCriticalEdges call. Bail out.
870 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
871 BasicBlock *InstrBB =
872 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
873 if (!InstrBB) {
875 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
876 return nullptr;
877 }
878 // For a critical edge, we have to split. Instrument the newly
879 // created BB.
880 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
881 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
882 << " --> " << getBBInfo(DestBB).Index << "\n");
883 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
884 MST.addEdge(SrcBB, InstrBB, 0);
885 // Second one: Add new edge of InstrBB->DestBB.
886 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
887 NewEdge1.InMST = true;
888 E->Removed = true;
889
890 return canInstrument(InstrBB);
891}
892
893// When generating value profiling calls on Windows routines that make use of
894// handler funclets for exception processing an operand bundle needs to attached
895// to the called function. This routine will set \p OpBundles to contain the
896// funclet information, if any is needed, that should be placed on the generated
897// value profiling call for the value profile candidate call.
898static void
902 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
903 if (!OrigCall)
904 return;
905
906 if (!isa<IntrinsicInst>(OrigCall)) {
907 // The instrumentation call should belong to the same funclet as a
908 // non-intrinsic call, so just copy the operand bundle, if any exists.
909 std::optional<OperandBundleUse> ParentFunclet =
910 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
911 if (ParentFunclet)
912 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
913 } else {
914 // Intrinsics or other instructions do not get funclet information from the
915 // front-end. Need to use the BlockColors that was computed by the routine
916 // colorEHFunclets to determine whether a funclet is needed.
917 if (!BlockColors.empty()) {
918 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
919 assert(CV.size() == 1 && "non-unique color for block!");
921 if (EHPadIt->isEHPad())
922 OpBundles.emplace_back("funclet", &*EHPadIt);
923 }
924 }
925}
926
927// Visit all edge and instrument the edges not in MST, and do value profiling.
928// Critical edges will be split.
929void FunctionInstrumenter::instrument() {
930 if (!PGOBlockCoverage) {
931 // Split indirectbr critical edges here before computing the MST rather than
932 // later in getInstrBB() to avoid invalidating it.
933 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
934 }
935
936 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
937 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
938 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
939 InstrumentationType == PGOInstrumentationType::CSFDO,
940 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
942
943 auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
944 auto *const CFGHash =
945 ConstantInt::get(Type::getInt64Ty(M.getContext()), FuncInfo.FunctionHash);
946 // Make sure that pointer to global is passed in with zero addrspace
947 // This is relevant during GPU profiling
948 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
949 Name, PointerType::get(M.getContext(), 0));
951 auto &EntryBB = F.getEntryBlock();
952 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
953 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
954 // i32 <index>)
955 Builder.CreateIntrinsic(
956 Intrinsic::instrprof_cover,
957 {NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
958 return;
959 }
960
961 std::vector<BasicBlock *> InstrumentBBs;
962 FuncInfo.getInstrumentBBs(InstrumentBBs);
963 unsigned NumCounters =
964 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
965
966 if (IsCtxProf) {
967 StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument);
968
969 auto *CSIntrinsic =
970 Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
971 // We want to count the instrumentable callsites, then instrument them. This
972 // is because the llvm.instrprof.callsite intrinsic has an argument (like
973 // the other instrprof intrinsics) capturing the total number of
974 // instrumented objects (counters, or callsites, in this case). In this
975 // case, we want that value so we can readily pass it to the compiler-rt
976 // APIs that may have to allocate memory based on the nr of callsites.
977 // The traversal logic is the same for both counting and instrumentation,
978 // just needs to be done in succession.
979 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
980 for (auto &BB : F)
981 for (auto &Instr : BB)
982 if (auto *CS = dyn_cast<CallBase>(&Instr)) {
984 continue;
985 if (CS->getCalledFunction() &&
986 SkipCSInstr.contains(CS->getCalledFunction()->getName()))
987 continue;
988 Visitor(CS);
989 }
990 };
991 // First, count callsites.
992 uint32_t TotalNumCallsites = 0;
993 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
994
995 // Now instrument.
996 uint32_t CallsiteIndex = 0;
997 Visit([&](auto *CB) {
998 IRBuilder<> Builder(CB);
999 Builder.CreateCall(CSIntrinsic,
1000 {Name, CFGHash, Builder.getInt32(TotalNumCallsites),
1001 Builder.getInt32(CallsiteIndex++),
1002 CB->getCalledOperand()});
1003 });
1004 }
1005
1006 uint32_t I = 0;
1008 NumCounters += PGOBlockCoverage ? 8 : 1;
1009 auto &EntryBB = F.getEntryBlock();
1010 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1011 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1012 // i32 <index>)
1013 Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
1014 {NormalizedNamePtr, CFGHash,
1015 Builder.getInt32(NumCounters),
1016 Builder.getInt32(I)});
1017 I += PGOBlockCoverage ? 8 : 1;
1018 }
1019
1020 for (auto *InstrBB : InstrumentBBs) {
1021 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1022 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1023 "Cannot get the Instrumentation point");
1024 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1025 // i32 <index>)
1026 Builder.CreateIntrinsic(PGOBlockCoverage ? Intrinsic::instrprof_cover
1027 : Intrinsic::instrprof_increment,
1028 {NormalizedNamePtr, CFGHash,
1029 Builder.getInt32(NumCounters),
1030 Builder.getInt32(I++)});
1031 }
1032
1033 // Now instrument select instructions:
1034 FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, Name,
1035 FuncInfo.FunctionHash);
1036 assert(I == NumCounters);
1037
1038 if (isValueProfilingDisabled())
1039 return;
1040
1041 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1042
1043 // Intrinsic function calls do not have funclet operand bundles needed for
1044 // Windows exception handling attached to them. However, if value profiling is
1045 // inserted for one of these calls, then a funclet value will need to be set
1046 // on the instrumentation call based on the funclet coloring.
1047 DenseMap<BasicBlock *, ColorVector> BlockColors;
1048 if (F.hasPersonalityFn() &&
1049 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
1050 BlockColors = colorEHFunclets(F);
1051
1052 // For each VP Kind, walk the VP candidates and instrument each one.
1053 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1054 unsigned SiteIndex = 0;
1055 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1056 continue;
1057
1058 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1059 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1060 << " site: CallSite Index = " << SiteIndex << "\n");
1061
1062 IRBuilder<> Builder(Cand.InsertPt);
1063 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1064 "Cannot get the Instrumentation point");
1065
1066 Value *ToProfile = nullptr;
1067 if (Cand.V->getType()->isIntegerTy())
1068 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1069 else if (Cand.V->getType()->isPointerTy())
1070 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1071 assert(ToProfile && "value profiling Value is of unexpected type");
1072
1073 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1074 Name, PointerType::get(M.getContext(), 0));
1075
1077 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1078 Builder.CreateCall(
1080 Intrinsic::instrprof_value_profile),
1081 {NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1082 ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1083 OpBundles);
1084 }
1085 } // IPVK_First <= Kind <= IPVK_Last
1086}
1087
1088namespace {
1089
1090// This class represents a CFG edge in profile use compilation.
1091struct PGOUseEdge : public PGOEdge {
1092 using PGOEdge::PGOEdge;
1093
1094 std::optional<uint64_t> Count;
1095
1096 // Set edge count value
1097 void setEdgeCount(uint64_t Value) { Count = Value; }
1098
1099 // Return the information string for this object.
1100 std::string infoString() const {
1101 if (!Count)
1102 return PGOEdge::infoString();
1103 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1104 }
1105};
1106
1107using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1108
1109// This class stores the auxiliary information for each BB.
1110struct PGOUseBBInfo : public PGOBBInfo {
1111 std::optional<uint64_t> Count;
1112 int32_t UnknownCountInEdge = 0;
1113 int32_t UnknownCountOutEdge = 0;
1114 DirectEdges InEdges;
1115 DirectEdges OutEdges;
1116
1117 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1118
1119 // Set the profile count value for this BB.
1120 void setBBInfoCount(uint64_t Value) { Count = Value; }
1121
1122 // Return the information string of this object.
1123 std::string infoString() const {
1124 if (!Count)
1125 return PGOBBInfo::infoString();
1126 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1127 }
1128
1129 // Add an OutEdge and update the edge count.
1130 void addOutEdge(PGOUseEdge *E) {
1131 OutEdges.push_back(E);
1132 UnknownCountOutEdge++;
1133 }
1134
1135 // Add an InEdge and update the edge count.
1136 void addInEdge(PGOUseEdge *E) {
1137 InEdges.push_back(E);
1138 UnknownCountInEdge++;
1139 }
1140};
1141
1142} // end anonymous namespace
1143
1144// Sum up the count values for all the edges.
1146 uint64_t Total = 0;
1147 for (const auto &E : Edges) {
1148 if (E->Removed)
1149 continue;
1150 if (E->Count)
1151 Total += *E->Count;
1152 }
1153 return Total;
1154}
1155
1156namespace {
1157
1158class PGOUseFunc {
1159public:
1160 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1161 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1162 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1163 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1164 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1165 bool HasSingleByteCoverage)
1166 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1167 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1168 InstrumentFuncEntry, InstrumentLoopEntries,
1169 HasSingleByteCoverage),
1170 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1171
1172 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1173
1174 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1175 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1176 /// errors.
1177 bool getRecord(IndexedInstrProfReader *PGOReader);
1178
1179 // Read counts for the instrumented BB from profile.
1180 bool readCounters(bool &AllZeros,
1182
1183 // Populate the counts for all BBs.
1184 void populateCounters();
1185
1186 // Set block coverage based on profile coverage values.
1187 void populateCoverage();
1188
1189 // Set the branch weights based on the count values.
1190 void setBranchWeights();
1191
1192 // Annotate the value profile call sites for all value kind.
1193 void annotateValueSites();
1194
1195 // Annotate the value profile call sites for one value kind.
1196 void annotateValueSites(uint32_t Kind);
1197
1198 // Annotate the irreducible loop header weights.
1199 void annotateIrrLoopHeaderWeights();
1200
1201 // The hotness of the function from the profile count.
1202 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1203
1204 // Return the function hotness from the profile.
1205 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1206
1207 // Return the function hash.
1208 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1209
1210 // Return the profile record for this function;
1211 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1212
1213 // Return the auxiliary BB information.
1214 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1215 return FuncInfo.getBBInfo(BB);
1216 }
1217
1218 // Return the auxiliary BB information if available.
1219 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1220 return FuncInfo.findBBInfo(BB);
1221 }
1222
1223 Function &getFunc() const { return F; }
1224
1225 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1226
1227 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1228
1229private:
1230 Function &F;
1231 Module *M;
1232 BlockFrequencyInfo *BFI;
1233 ProfileSummaryInfo *PSI;
1234
1235 // This member stores the shared information with class PGOGenFunc.
1236 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1237
1238 // The maximum count value in the profile. This is only used in PGO use
1239 // compilation.
1240 uint64_t ProgramMaxCount;
1241
1242 // Position of counter that remains to be read.
1243 uint32_t CountPosition = 0;
1244
1245 // Total size of the profile count for this function.
1246 uint32_t ProfileCountSize = 0;
1247
1248 // ProfileRecord for this function.
1249 NamedInstrProfRecord ProfileRecord;
1250
1251 // Function hotness info derived from profile.
1252 FuncFreqAttr FreqAttr;
1253
1254 // Is to use the context sensitive profile.
1255 bool IsCS;
1256
1257 ValueProfileCollector VPC;
1258
1259 // Find the Instrumented BB and set the value. Return false on error.
1260 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1261
1262 // Set the edge counter value for the unknown edge -- there should be only
1263 // one unknown edge.
1264 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1265
1266 // Set the hot/cold inline hints based on the count values.
1267 // FIXME: This function should be removed once the functionality in
1268 // the inliner is implemented.
1269 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1270 if (PSI->isHotCount(EntryCount))
1271 FreqAttr = FFA_Hot;
1272 else if (PSI->isColdCount(MaxCount))
1273 FreqAttr = FFA_Cold;
1274 }
1275};
1276
1277} // end anonymous namespace
1278
1279/// Set up InEdges/OutEdges for all BBs in the MST.
1281 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1282 // This is not required when there is block coverage inference.
1283 if (FuncInfo.BCI)
1284 return;
1285 for (const auto &E : FuncInfo.MST.allEdges()) {
1286 if (E->Removed)
1287 continue;
1288 const BasicBlock *SrcBB = E->SrcBB;
1289 const BasicBlock *DestBB = E->DestBB;
1290 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1291 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1292 SrcInfo.addOutEdge(E.get());
1293 DestInfo.addInEdge(E.get());
1294 }
1295}
1296
1297// Visit all the edges and assign the count value for the instrumented
1298// edges and the BB. Return false on error.
1299bool PGOUseFunc::setInstrumentedCounts(
1300 const std::vector<uint64_t> &CountFromProfile) {
1301
1302 std::vector<BasicBlock *> InstrumentBBs;
1303 FuncInfo.getInstrumentBBs(InstrumentBBs);
1304
1305 setupBBInfoEdges(FuncInfo);
1306
1307 unsigned NumCounters =
1308 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1309 // The number of counters here should match the number of counters
1310 // in profile. Return if they mismatch.
1311 if (NumCounters != CountFromProfile.size()) {
1312 return false;
1313 }
1314 auto *FuncEntry = &*F.begin();
1315
1316 // Set the profile count to the Instrumented BBs.
1317 uint32_t I = 0;
1318 for (BasicBlock *InstrBB : InstrumentBBs) {
1319 uint64_t CountValue = CountFromProfile[I++];
1320 PGOUseBBInfo &Info = getBBInfo(InstrBB);
1321 // If we reach here, we know that we have some nonzero count
1322 // values in this function. The entry count should not be 0.
1323 // Fix it if necessary.
1324 if (InstrBB == FuncEntry && CountValue == 0)
1325 CountValue = 1;
1326 Info.setBBInfoCount(CountValue);
1327 }
1328 ProfileCountSize = CountFromProfile.size();
1329 CountPosition = I;
1330
1331 // Set the edge count and update the count of unknown edges for BBs.
1332 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1333 E->setEdgeCount(Value);
1334 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1335 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1336 };
1337
1338 // Set the profile count the Instrumented edges. There are BBs that not in
1339 // MST but not instrumented. Need to set the edge count value so that we can
1340 // populate the profile counts later.
1341 for (const auto &E : FuncInfo.MST.allEdges()) {
1342 if (E->Removed || E->InMST)
1343 continue;
1344 const BasicBlock *SrcBB = E->SrcBB;
1345 PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1346
1347 // If only one out-edge, the edge profile count should be the same as BB
1348 // profile count.
1349 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1350 setEdgeCount(E.get(), *SrcInfo.Count);
1351 else {
1352 const BasicBlock *DestBB = E->DestBB;
1353 PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1354 // If only one in-edge, the edge profile count should be the same as BB
1355 // profile count.
1356 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1357 setEdgeCount(E.get(), *DestInfo.Count);
1358 }
1359 if (E->Count)
1360 continue;
1361 // E's count should have been set from profile. If not, this meenas E skips
1362 // the instrumentation. We set the count to 0.
1363 setEdgeCount(E.get(), 0);
1364 }
1365 return true;
1366}
1367
1368// Set the count value for the unknown edge. There should be one and only one
1369// unknown edge in Edges vector.
1370void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1371 for (auto &E : Edges) {
1372 if (E->Count)
1373 continue;
1374 E->setEdgeCount(Value);
1375
1376 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1377 getBBInfo(E->DestBB).UnknownCountInEdge--;
1378 return;
1379 }
1380 llvm_unreachable("Cannot find the unknown count edge");
1381}
1382
1383// Emit function metadata indicating PGO profile mismatch.
1385 const char MetadataName[] = "instr_prof_hash_mismatch";
1387 // If this metadata already exists, ignore.
1388 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1389 if (Existing) {
1390 MDTuple *Tuple = cast<MDTuple>(Existing);
1391 for (const auto &N : Tuple->operands()) {
1392 if (N.equalsStr(MetadataName))
1393 return;
1394 Names.push_back(N.get());
1395 }
1396 }
1397
1398 MDBuilder MDB(ctx);
1399 Names.push_back(MDB.createString(MetadataName));
1400 MDNode *MD = MDTuple::get(ctx, Names);
1401 F.setMetadata(LLVMContext::MD_annotation, MD);
1402}
1403
1404void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1405 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1406 auto &Ctx = M->getContext();
1407 auto Err = IPE.get();
1408 bool SkipWarning = false;
1409 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1410 << FuncInfo.FuncName << ": ");
1411 if (Err == instrprof_error::unknown_function) {
1412 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1413 SkipWarning = !PGOWarnMissing;
1414 LLVM_DEBUG(dbgs() << "unknown function");
1415 } else if (Err == instrprof_error::hash_mismatch ||
1416 Err == instrprof_error::malformed) {
1417 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1418 SkipWarning =
1421 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1423 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1424 << " skip=" << SkipWarning << ")");
1425 // Emit function metadata indicating PGO profile mismatch.
1426 annotateFunctionWithHashMismatch(F, M->getContext());
1427 }
1428
1429 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1430 if (SkipWarning)
1431 return;
1432
1433 std::string Msg =
1434 IPE.message() + std::string(" ") + F.getName().str() +
1435 std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1436 std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1437 std::string(" count discarded");
1438
1439 Ctx.diagnose(
1440 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1441 });
1442}
1443
1444bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1445 uint64_t MismatchedFuncSum = 0;
1446 auto Result = PGOReader->getInstrProfRecord(
1447 FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1448 &MismatchedFuncSum);
1449 if (Error E = Result.takeError()) {
1450 handleInstrProfError(std::move(E), MismatchedFuncSum);
1451 return false;
1452 }
1453 ProfileRecord = std::move(Result.get());
1454 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1455 return true;
1456}
1457
1458// Read the profile from ProfileFileName and assign the value to the
1459// instrumented BB and the edges. Return true if the profile are successfully
1460// read, and false on errors.
1461bool PGOUseFunc::readCounters(bool &AllZeros,
1463 auto &Ctx = M->getContext();
1464 PseudoKind = ProfileRecord.getCountPseudoKind();
1465 if (PseudoKind != InstrProfRecord::NotPseudo) {
1466 return true;
1467 }
1468 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1469
1470 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1471 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1472
1473 uint64_t ValueSum = 0;
1474 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1475 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1476 ValueSum += CountFromProfile[I];
1477 }
1478 AllZeros = (ValueSum == 0);
1479
1480 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1481
1482 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1483 getBBInfo(nullptr).UnknownCountInEdge = 2;
1484
1485 if (!setInstrumentedCounts(CountFromProfile)) {
1486 LLVM_DEBUG(
1487 dbgs() << "Inconsistent number of counts, skipping this function");
1488 Ctx.diagnose(DiagnosticInfoPGOProfile(
1489 M->getName().data(),
1490 Twine("Inconsistent number of counts in ") + F.getName().str() +
1491 Twine(": the profile may be stale or there is a function name "
1492 "collision."),
1493 DS_Warning));
1494 return false;
1495 }
1496 return true;
1497}
1498
1499void PGOUseFunc::populateCoverage() {
1500 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1501
1502 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1503 DenseMap<const BasicBlock *, bool> Coverage;
1504 unsigned Index = 0;
1505 for (auto &BB : F)
1506 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1507 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1508 assert(Index == CountsFromProfile.size());
1509
1510 // For each B in InverseDependencies[A], if A is covered then B is covered.
1511 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1512 InverseDependencies;
1513 for (auto &BB : F) {
1514 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1515 // If Dep is covered then BB is covered.
1516 InverseDependencies[Dep].insert(&BB);
1517 }
1518 }
1519
1520 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1521 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1522 for (auto &[BB, IsCovered] : Coverage)
1523 if (IsCovered)
1524 CoveredBlocksToProcess.push(BB);
1525
1526 while (!CoveredBlocksToProcess.empty()) {
1527 auto *CoveredBlock = CoveredBlocksToProcess.top();
1528 assert(Coverage[CoveredBlock]);
1529 CoveredBlocksToProcess.pop();
1530 for (auto *BB : InverseDependencies[CoveredBlock]) {
1531 // If CoveredBlock is covered then BB is covered.
1532 bool &Cov = Coverage[BB];
1533 if (Cov)
1534 continue;
1535 Cov = true;
1536 CoveredBlocksToProcess.push(BB);
1537 }
1538 }
1539
1540 // Annotate block coverage.
1541 MDBuilder MDB(F.getContext());
1542 // We set the entry count to 10000 if the entry block is covered so that BFI
1543 // can propagate a fraction of this count to the other covered blocks.
1544 F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1545 for (auto &BB : F) {
1546 // For a block A and its successor B, we set the edge weight as follows:
1547 // If A is covered and B is covered, set weight=1.
1548 // If A is covered and B is uncovered, set weight=0.
1549 // If A is uncovered, set weight=1.
1550 // This setup will allow BFI to give nonzero profile counts to only covered
1551 // blocks.
1552 SmallVector<uint32_t, 4> Weights;
1553 for (auto *Succ : successors(&BB))
1554 Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1555 if (Weights.size() >= 2)
1556 llvm::setBranchWeights(*BB.getTerminator(), Weights,
1557 /*IsExpected=*/false);
1558 }
1559
1560 unsigned NumCorruptCoverage = 0;
1561 DominatorTree DT(F);
1562 LoopInfo LI(DT);
1563 BranchProbabilityInfo BPI(F, LI);
1564 BlockFrequencyInfo BFI(F, BPI, LI);
1565 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1566 if (auto C = BFI.getBlockProfileCount(&BB))
1567 return C == 0;
1568 return {};
1569 };
1570 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1571 for (auto &BB : F) {
1572 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1573 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1574 << "\n");
1575 // In some cases it is possible to find a covered block that has no covered
1576 // successors, e.g., when a block calls a function that may call exit(). In
1577 // those cases, BFI could find its successor to be covered while BCI could
1578 // find its successor to be dead.
1579 const bool &Cov = Coverage[&BB];
1580 if (Cov == IsBlockDead(BB).value_or(false)) {
1581 LLVM_DEBUG(
1582 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1583 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1584 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1585 ++NumCorruptCoverage;
1586 }
1587 if (Cov)
1588 ++NumCoveredBlocks;
1589 }
1590 if (PGOVerifyBFI && NumCorruptCoverage) {
1591 auto &Ctx = M->getContext();
1592 Ctx.diagnose(DiagnosticInfoPGOProfile(
1593 M->getName().data(),
1594 Twine("Found inconsistent block coverage for function ") + F.getName() +
1595 " in " + Twine(NumCorruptCoverage) + " blocks.",
1596 DS_Warning));
1597 }
1599 FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1600}
1601
1602// Populate the counters from instrumented BBs to all BBs.
1603// In the end of this operation, all BBs should have a valid count value.
1604void PGOUseFunc::populateCounters() {
1605 bool Changes = true;
1606 unsigned NumPasses = 0;
1607 while (Changes) {
1608 NumPasses++;
1609 Changes = false;
1610
1611 // For efficient traversal, it's better to start from the end as most
1612 // of the instrumented edges are at the end.
1613 for (auto &BB : reverse(F)) {
1614 PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1615 if (UseBBInfo == nullptr)
1616 continue;
1617 if (!UseBBInfo->Count) {
1618 if (UseBBInfo->UnknownCountOutEdge == 0) {
1619 UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1620 Changes = true;
1621 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1622 UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1623 Changes = true;
1624 }
1625 }
1626 if (UseBBInfo->Count) {
1627 if (UseBBInfo->UnknownCountOutEdge == 1) {
1628 uint64_t Total = 0;
1629 uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1630 // If the one of the successor block can early terminate (no-return),
1631 // we can end up with situation where out edge sum count is larger as
1632 // the source BB's count is collected by a post-dominated block.
1633 if (*UseBBInfo->Count > OutSum)
1634 Total = *UseBBInfo->Count - OutSum;
1635 setEdgeCount(UseBBInfo->OutEdges, Total);
1636 Changes = true;
1637 }
1638 if (UseBBInfo->UnknownCountInEdge == 1) {
1639 uint64_t Total = 0;
1640 uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1641 if (*UseBBInfo->Count > InSum)
1642 Total = *UseBBInfo->Count - InSum;
1643 setEdgeCount(UseBBInfo->InEdges, Total);
1644 Changes = true;
1645 }
1646 }
1647 }
1648 }
1649
1650 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1651 (void)NumPasses;
1652#ifndef NDEBUG
1653 // Assert every BB has a valid counter.
1654 for (auto &BB : F) {
1655 auto BI = findBBInfo(&BB);
1656 if (BI == nullptr)
1657 continue;
1658 assert(BI->Count && "BB count is not valid");
1659 }
1660#endif
1661 // Now annotate select instructions. This may fixup impossible block counts.
1662 FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1663 assert(CountPosition == ProfileCountSize);
1664
1665 uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1666 uint64_t FuncMaxCount = FuncEntryCount;
1667 for (auto &BB : F) {
1668 auto BI = findBBInfo(&BB);
1669 if (BI == nullptr)
1670 continue;
1671 FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1672 }
1673
1674 // Fix the obviously inconsistent entry count.
1675 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1676 FuncEntryCount = 1;
1678 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1679
1680 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1681}
1682
1683// Assign the scaled count values to the BB with multiple out edges.
1684void PGOUseFunc::setBranchWeights() {
1685 // Generate MD_prof metadata for every branch instruction.
1686 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1687 << " IsCS=" << IsCS << "\n");
1688 for (auto &BB : F) {
1689 Instruction *TI = BB.getTerminator();
1690 if (TI->getNumSuccessors() < 2)
1691 continue;
1692 if (!(isa<CondBrInst>(TI) || isa<SwitchInst>(TI) ||
1694 isa<CallBrInst>(TI)))
1695 continue;
1696
1697 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1698 if (!*BBCountInfo.Count)
1699 continue;
1700
1701 // We have a non-zero Branch BB.
1702
1703 // SuccessorCount can be greater than OutEdgesCount, because
1704 // removed edges don't appear in OutEdges.
1705 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1706 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1707 assert(OutEdgesCount <= SuccessorCount);
1708
1709 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1710 uint64_t MaxCount = 0;
1711 for (unsigned It = 0; It < OutEdgesCount; It++) {
1712 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1713 const BasicBlock *SrcBB = E->SrcBB;
1714 const BasicBlock *DestBB = E->DestBB;
1715 if (DestBB == nullptr)
1716 continue;
1717 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1718 uint64_t EdgeCount = *E->Count;
1719 if (EdgeCount > MaxCount)
1720 MaxCount = EdgeCount;
1721 EdgeCounts[SuccNum] = EdgeCount;
1722 }
1723
1724 if (MaxCount)
1725 setProfMetadata(TI, EdgeCounts, MaxCount);
1726 else {
1727 // A zero MaxCount can come about when we have a BB with a positive
1728 // count, and whose successor blocks all have 0 count. This can happen
1729 // when there is no exit block and the code exits via a noreturn function.
1730 auto &Ctx = M->getContext();
1731 Ctx.diagnose(DiagnosticInfoPGOProfile(
1732 M->getName().data(),
1733 Twine("Profile in ") + F.getName().str() +
1734 Twine(" partially ignored") +
1735 Twine(", possibly due to the lack of a return path."),
1736 DS_Warning));
1737 }
1738 }
1739}
1740
1742 for (BasicBlock *Pred : predecessors(BB)) {
1743 if (isa<IndirectBrInst>(Pred->getTerminator()))
1744 return true;
1745 }
1746 return false;
1747}
1748
1749void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1750 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1751 // Find irr loop headers
1752 for (auto &BB : F) {
1753 // As a heuristic also annotate indrectbr targets as they have a high chance
1754 // to become an irreducible loop header after the indirectbr tail
1755 // duplication.
1756 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1757 Instruction *TI = BB.getTerminator();
1758 const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1759 setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1760 }
1761 }
1762}
1763
1764void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1765 Module *M = F.getParent();
1766 IRBuilder<> Builder(&SI);
1767 Type *Int64Ty = Builder.getInt64Ty();
1768 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1769 auto *NormalizedFuncNameVarPtr =
1771 FuncNameVar, PointerType::get(M->getContext(), 0));
1772 Builder.CreateIntrinsic(Intrinsic::instrprof_increment_step,
1773 {NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1774 Builder.getInt32(TotalNumCtrs),
1775 Builder.getInt32(*CurCtrIdx), Step});
1776 ++(*CurCtrIdx);
1777}
1778
1779void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1780 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1781 assert(*CurCtrIdx < CountFromProfile.size() &&
1782 "Out of bound access of counters");
1783 uint64_t SCounts[2];
1784 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1785 ++(*CurCtrIdx);
1786 uint64_t TotalCount = 0;
1787 auto BI = UseFunc->findBBInfo(SI.getParent());
1788 if (BI != nullptr) {
1789 TotalCount = *BI->Count;
1790
1791 // Fix the block count if it is impossible.
1792 if (TotalCount < SCounts[0])
1793 BI->Count = SCounts[0];
1794 }
1795 // False Count
1796 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1797 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1798 if (MaxCount)
1799 setProfMetadata(&SI, SCounts, MaxCount);
1800}
1801
1802void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1803 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1804 return;
1805 // FIXME: do not handle this yet.
1806 if (SI.getCondition()->getType()->isVectorTy())
1807 return;
1808
1809 switch (Mode) {
1810 case VM_counting:
1811 NSIs++;
1812 return;
1813 case VM_instrument:
1814 instrumentOneSelectInst(SI);
1815 return;
1816 case VM_annotate:
1817 annotateOneSelectInst(SI);
1818 return;
1819 }
1820
1821 llvm_unreachable("Unknown visiting mode");
1822}
1823
1825 if (ValueProfKind == IPVK_MemOPSize)
1827 if (ValueProfKind == llvm::IPVK_VTableTarget)
1829 return MaxNumAnnotations;
1830}
1831
1832// Traverse all valuesites and annotate the instructions for all value kind.
1833void PGOUseFunc::annotateValueSites() {
1835 return;
1836
1837 // Create the PGOFuncName meta data.
1838 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1839
1840 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1841 annotateValueSites(Kind);
1842}
1843
1844// Annotate the instructions for a specific value kind.
1845void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1846 assert(Kind <= IPVK_Last);
1847 unsigned ValueSiteIndex = 0;
1848
1849 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1850
1851 // Since there isn't a reliable or fast way for profile reader to tell if a
1852 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1853 // value profile collector over the function IR to find the instrumented sites
1854 // iff function profile records shows the number of instrumented vtable sites
1855 // is not zero. Function cfg already takes the number of instrumented
1856 // indirect call sites into account so it doesn't hash the number of
1857 // instrumented vtables; as a side effect it makes it easier to enable
1858 // profiling and profile use in two steps if needed.
1859 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1860 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1861 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1863 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1864 auto &ValueSites = FuncInfo.ValueSites[Kind];
1865 if (NumValueSites != ValueSites.size()) {
1866 auto &Ctx = M->getContext();
1867 Ctx.diagnose(DiagnosticInfoPGOProfile(
1868 M->getName().data(),
1869 Twine("Inconsistent number of value sites for ") +
1870 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1871 F.getName().str() +
1872 Twine("\", possibly due to the use of a stale profile."),
1873 DS_Warning));
1874 return;
1875 }
1876
1877 for (VPCandidateInfo &I : ValueSites) {
1878 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1879 << "): Index = " << ValueSiteIndex << " out of "
1880 << NumValueSites << "\n");
1882 *M, *I.AnnotatedInst, ProfileRecord,
1883 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1884 getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1885 ValueSiteIndex++;
1886 }
1887}
1888
1889// Collect the set of members for each Comdat in module M and store
1890// in ComdatMembers.
1892 Module &M,
1893 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1894 if (!DoComdatRenaming)
1895 return;
1896 for (Function &F : M)
1897 if (Comdat *C = F.getComdat())
1898 ComdatMembers.insert(std::make_pair(C, &F));
1899 for (GlobalVariable &GV : M.globals())
1900 if (Comdat *C = GV.getComdat())
1901 ComdatMembers.insert(std::make_pair(C, &GV));
1902 for (GlobalAlias &GA : M.aliases())
1903 if (Comdat *C = GA.getComdat())
1904 ComdatMembers.insert(std::make_pair(C, &GA));
1905}
1906
1907// Return true if we should not find instrumentation data for this function
1908static bool skipPGOUse(const Function &F) {
1909 if (F.isDeclaration())
1910 return true;
1911 // If there are too many critical edges, PGO might cause
1912 // compiler time problem. Skip PGO if the number of
1913 // critical edges execeed the threshold.
1914 unsigned NumCriticalEdges = 0;
1915 for (auto &BB : F) {
1916 const Instruction *TI = BB.getTerminator();
1917 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1918 if (isCriticalEdge(TI, I))
1919 NumCriticalEdges++;
1920 }
1921 }
1922 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1923 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1924 << ", NumCriticalEdges=" << NumCriticalEdges
1925 << " exceed the threshold. Skip PGO.\n");
1926 return true;
1927 }
1928 return false;
1929}
1930
1931// Return true if we should not instrument this function
1932static bool skipPGOGen(const Function &F) {
1933 if (skipPGOUse(F))
1934 return true;
1935 if (F.hasFnAttribute(llvm::Attribute::Naked))
1936 return true;
1937 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1938 return true;
1939 if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1940 return true;
1941 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1942 return true;
1944 if (auto EntryCount = F.getEntryCount())
1945 return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
1946 return !PGOTreatUnknownAsCold;
1947 }
1948 return false;
1949}
1950
1952 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1955 function_ref<LoopInfo *(Function &)> LookupLI,
1956 PGOInstrumentationType InstrumentationType) {
1957 // For the context-sensitive instrumentation, we should have a separated pass
1958 // (before LTO/ThinLTO linking) to create these variables.
1959 if (InstrumentationType == PGOInstrumentationType::FDO)
1960 createIRLevelProfileFlagVar(M, InstrumentationType);
1961
1962 Triple TT(M.getTargetTriple());
1963 LLVMContext &Ctx = M.getContext();
1964 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1966 M.getName().data(),
1967 Twine("VTable value profiling is presently not "
1968 "supported for non-ELF object formats"),
1969 DS_Warning));
1970 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1971 collectComdatMembers(M, ComdatMembers);
1972
1973 for (auto &F : M) {
1974 if (skipPGOGen(F))
1975 continue;
1976 TargetLibraryInfo &TLI = LookupTLI(F);
1977 BranchProbabilityInfo *BPI = LookupBPI(F);
1978 BlockFrequencyInfo *BFI = LookupBFI(F);
1979 LoopInfo *LI = LookupLI(F);
1980 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
1981 InstrumentationType);
1982 FI.instrument();
1983 }
1984 return true;
1985}
1986
1987PreservedAnalyses
1989 createProfileFileNameVar(M, CSInstrName);
1990 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1991 // will be retained.
1994 if (ProfileSampling)
1999 return PA;
2000}
2001
2004 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2005 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2006 return FAM.getResult<TargetLibraryAnalysis>(F);
2007 };
2008 auto LookupBPI = [&FAM](Function &F) {
2009 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2010 };
2011 auto LookupBFI = [&FAM](Function &F) {
2012 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2013 };
2014 auto LookupLI = [&FAM](Function &F) {
2015 return &FAM.getResult<LoopAnalysis>(F);
2016 };
2017
2018 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2019 InstrumentationType))
2020 return PreservedAnalyses::all();
2021
2022 return PreservedAnalyses::none();
2023}
2024
2025// Using the ratio b/w sums of profile count values and BFI count values to
2026// adjust the func entry count.
2027static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2028 BranchProbabilityInfo &NBPI) {
2029 Function &F = Func.getFunc();
2030 BlockFrequencyInfo NBFI(F, NBPI, LI);
2031#ifndef NDEBUG
2032 auto BFIEntryCount = F.getEntryCount();
2033 assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
2034 "Invalid BFI Entrycount");
2035#endif
2036 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
2037 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
2038 for (auto &BBI : F) {
2039 uint64_t CountValue = 0;
2040 uint64_t BFICountValue = 0;
2041 if (!Func.findBBInfo(&BBI))
2042 continue;
2043 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2044 CountValue = *Func.getBBInfo(&BBI).Count;
2045 BFICountValue = *BFICount;
2046 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
2047 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
2048 }
2049 if (SumCount.isZero())
2050 return;
2051
2052 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2053 "Incorrect sum of BFI counts");
2054 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
2055 return;
2056 double Scale = (SumCount / SumBFICount).convertToDouble();
2057 if (Scale < 1.001 && Scale > 0.999)
2058 return;
2059
2060 uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
2061 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2062 if (NewEntryCount == 0)
2063 NewEntryCount = 1;
2064 if (NewEntryCount != FuncEntryCount) {
2065 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
2066 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2067 << ", entry_count " << FuncEntryCount << " --> "
2068 << NewEntryCount << "\n");
2069 }
2070}
2071
2072// Compare the profile count values with BFI count values, and print out
2073// the non-matching ones.
2074static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2076 uint64_t HotCountThreshold,
2078 Function &F = Func.getFunc();
2079 BlockFrequencyInfo NBFI(F, NBPI, LI);
2080 // bool PrintFunc = false;
2081 bool HotBBOnly = PGOVerifyHotBFI;
2082 StringRef Msg;
2084
2085 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2086 for (auto &BBI : F) {
2087 PGOUseBBInfo *BBInfo = Func.findBBInfo(&BBI);
2088 if (!BBInfo)
2089 continue;
2090
2091 uint64_t CountValue = BBInfo->Count.value_or(CountValue);
2092 uint64_t BFICountValue = 0;
2093
2094 BBNum++;
2095 if (CountValue)
2096 NonZeroBBNum++;
2097 auto BFICount = NBFI.getBlockProfileCount(&BBI);
2098 if (BFICount)
2099 BFICountValue = *BFICount;
2100
2101 if (HotBBOnly) {
2102 bool rawIsHot = CountValue >= HotCountThreshold;
2103 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2104 bool rawIsCold = CountValue <= ColdCountThreshold;
2105 bool ShowCount = false;
2106 if (rawIsHot && !BFIIsHot) {
2107 Msg = "raw-Hot to BFI-nonHot";
2108 ShowCount = true;
2109 } else if (rawIsCold && BFIIsHot) {
2110 Msg = "raw-Cold to BFI-Hot";
2111 ShowCount = true;
2112 }
2113 if (!ShowCount)
2114 continue;
2115 } else {
2116 if ((CountValue < PGOVerifyBFICutoff) &&
2117 (BFICountValue < PGOVerifyBFICutoff))
2118 continue;
2119 uint64_t Diff = (BFICountValue >= CountValue)
2120 ? BFICountValue - CountValue
2121 : CountValue - BFICountValue;
2122 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2123 continue;
2124 }
2125 BBMisMatchNum++;
2126
2127 ORE.emit([&]() {
2129 F.getSubprogram(), &BBI);
2130 Remark << "BB " << ore::NV("Block", BBI.getName())
2131 << " Count=" << ore::NV("Count", CountValue)
2132 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2133 if (!Msg.empty())
2134 Remark << " (" << Msg << ")";
2135 return Remark;
2136 });
2137 }
2138 if (BBMisMatchNum)
2139 ORE.emit([&]() {
2140 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2141 F.getSubprogram(), &F.getEntryBlock())
2142 << "In Func " << ore::NV("Function", F.getName())
2143 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2144 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2145 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2146 });
2147}
2148
2150 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2151 vfs::FileSystem &FS,
2152 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2155 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2156 bool IsCS) {
2157 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2158 auto &Ctx = M.getContext();
2159 // Read the counter array from file.
2160 auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2161 ProfileRemappingFileName);
2162 if (Error E = ReaderOrErr.takeError()) {
2163 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2164 Ctx.diagnose(
2165 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2166 });
2167 return false;
2168 }
2169
2170 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2171 std::move(ReaderOrErr.get());
2172 if (!PGOReader) {
2173 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2174 StringRef("Cannot get PGOReader")));
2175 return false;
2176 }
2177 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2178 return false;
2179
2180 // TODO: might need to change the warning once the clang option is finalized.
2181 if (!PGOReader->isIRLevelProfile()) {
2182 Ctx.diagnose(DiagnosticInfoPGOProfile(
2183 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2184 return false;
2185 }
2186 if (PGOReader->functionEntryOnly()) {
2187 Ctx.diagnose(DiagnosticInfoPGOProfile(
2188 ProfileFileName.data(),
2189 "Function entry profiles are not yet supported for optimization"));
2190 return false;
2191 }
2192
2194 for (GlobalVariable &G : M.globals()) {
2195 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2196 continue;
2197
2198 // Create the PGOFuncName meta data.
2199 createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2200 }
2201 }
2202
2203 // Add the profile summary (read from the header of the indexed summary) here
2204 // so that we can use it below when reading counters (which checks if the
2205 // function should be marked with a cold or inlinehint attribute).
2206 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2209 PSI->refresh();
2210
2211 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2212 collectComdatMembers(M, ComdatMembers);
2213 std::vector<Function *> HotFunctions;
2214 std::vector<Function *> ColdFunctions;
2215
2216 // If the profile marked as always instrument the entry BB, do the
2217 // same. Note this can be overwritten by the internal option in CFGMST.h
2218 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2219 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2220 InstrumentFuncEntry = PGOInstrumentEntry;
2221 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2222 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2223 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2224
2225 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2226 for (auto &F : M) {
2227 if (skipPGOUse(F))
2228 continue;
2229 TargetLibraryInfo &TLI = LookupTLI(F);
2230 BranchProbabilityInfo *BPI = LookupBPI(F);
2231 BlockFrequencyInfo *BFI = LookupBFI(F);
2232 LoopInfo *LI = LookupLI(F);
2233 if (!HasSingleByteCoverage) {
2234 // Split indirectbr critical edges here before computing the MST rather
2235 // than later in getInstrBB() to avoid invalidating it.
2236 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2237 BFI);
2238 }
2239 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2240 InstrumentFuncEntry, InstrumentLoopEntries,
2241 HasSingleByteCoverage);
2242 if (!Func.getRecord(PGOReader.get()))
2243 continue;
2244 if (HasSingleByteCoverage) {
2245 Func.populateCoverage();
2246 continue;
2247 }
2248 // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
2249 // it means the profile for the function is unrepresentative and this
2250 // function is actually hot / warm. We will reset the function hot / cold
2251 // attribute and drop all the profile counters.
2253 bool AllZeros = false;
2254 if (!Func.readCounters(AllZeros, PseudoKind))
2255 continue;
2256 if (AllZeros) {
2257 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2258 if (Func.getProgramMaxCount() != 0)
2259 ColdFunctions.push_back(&F);
2260 continue;
2261 }
2262 if (PseudoKind != InstrProfRecord::NotPseudo) {
2263 // Clear function attribute cold.
2264 if (F.hasFnAttribute(Attribute::Cold))
2265 F.removeFnAttr(Attribute::Cold);
2266 // Set function attribute as hot.
2267 if (PseudoKind == InstrProfRecord::PseudoHot)
2268 F.addFnAttr(Attribute::Hot);
2269 continue;
2270 }
2271 Func.populateCounters();
2272 Func.setBranchWeights();
2273 Func.annotateValueSites();
2274 Func.annotateIrrLoopHeaderWeights();
2275 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2276 if (FreqAttr == PGOUseFunc::FFA_Cold)
2277 ColdFunctions.push_back(&F);
2278 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2279 HotFunctions.push_back(&F);
2280 if (PGOViewCounts != PGOVCT_None &&
2281 (ViewBlockFreqFuncName.empty() ||
2282 F.getName() == ViewBlockFreqFuncName)) {
2284 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2285 std::make_unique<BranchProbabilityInfo>(F, LI);
2286 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2287 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2289 NewBFI->view();
2290 else if (PGOViewCounts == PGOVCT_Text) {
2291 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2292 NewBFI->print(dbgs());
2293 }
2294 }
2296 (ViewBlockFreqFuncName.empty() ||
2297 F.getName() == ViewBlockFreqFuncName)) {
2299 if (ViewBlockFreqFuncName.empty())
2300 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2301 else
2302 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2303 else if (PGOViewRawCounts == PGOVCT_Text) {
2304 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2305 Func.dumpInfo();
2306 }
2307 }
2308
2311 BranchProbabilityInfo NBPI(F, LI);
2312
2313 // Fix func entry count.
2314 if (PGOFixEntryCount)
2315 fixFuncEntryCount(Func, LI, NBPI);
2316
2317 // Verify BlockFrequency information.
2318 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2319 if (PGOVerifyHotBFI) {
2320 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2322 }
2323 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2324 }
2325 }
2326
2327 // Set function hotness attribute from the profile.
2328 // We have to apply these attributes at the end because their presence
2329 // can affect the BranchProbabilityInfo of any callers, resulting in an
2330 // inconsistent MST between prof-gen and prof-use.
2331 for (auto &F : HotFunctions) {
2332 F->addFnAttr(Attribute::InlineHint);
2333 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2334 << "\n");
2335 }
2336 for (auto &F : ColdFunctions) {
2337 // Only set when there is no Attribute::Hot set by the user. For Hot
2338 // attribute, user's annotation has the precedence over the profile.
2339 if (F->hasFnAttribute(Attribute::Hot)) {
2340 auto &Ctx = M.getContext();
2341 std::string Msg = std::string("Function ") + F->getName().str() +
2342 std::string(" is annotated as a hot function but"
2343 " the profile is cold");
2344 Ctx.diagnose(
2345 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2346 continue;
2347 }
2348 F->addFnAttr(Attribute::Cold);
2349 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2350 << "\n");
2351 }
2352 return true;
2353}
2354
2356 std::string Filename, std::string RemappingFilename, bool IsCS,
2358 : ProfileFileName(std::move(Filename)),
2359 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2360 FS(std::move(VFS)) {
2361 if (!PGOTestProfileFile.empty())
2362 ProfileFileName = PGOTestProfileFile;
2364 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2365 if (!FS)
2367}
2368
2371
2372 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2373 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2374 return FAM.getResult<TargetLibraryAnalysis>(F);
2375 };
2376 auto LookupBPI = [&FAM](Function &F) {
2377 return &FAM.getResult<BranchProbabilityAnalysis>(F);
2378 };
2379 auto LookupBFI = [&FAM](Function &F) {
2380 return &FAM.getResult<BlockFrequencyAnalysis>(F);
2381 };
2382 auto LookupLI = [&FAM](Function &F) {
2383 return &FAM.getResult<LoopAnalysis>(F);
2384 };
2385
2386 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2387 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2388 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2389 IsCS))
2390 return PreservedAnalyses::all();
2391
2392 return PreservedAnalyses::none();
2393}
2394
2395static std::string getSimpleNodeName(const BasicBlock *Node) {
2396 if (!Node->getName().empty())
2397 return Node->getName().str();
2398
2399 std::string SimpleNodeName;
2400 raw_string_ostream OS(SimpleNodeName);
2401 Node->printAsOperand(OS, false);
2402 return SimpleNodeName;
2403}
2404
2406 uint64_t MaxCount) {
2407 auto Weights = downscaleWeights(EdgeCounts, MaxCount);
2408
2409 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2410 : Weights) {
2411 dbgs() << W << " ";
2412 } dbgs() << "\n";);
2413
2414 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2415
2416 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2418 std::string BrCondStr = getBranchCondString(TI);
2419 if (BrCondStr.empty())
2420 return;
2421
2422 uint64_t WSum =
2423 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2424 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2425 uint64_t TotalCount =
2426 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2427 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2428 uint64_t Scale = calculateCountScale(WSum);
2429 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2430 scaleBranchCount(WSum, Scale));
2431 std::string BranchProbStr;
2432 raw_string_ostream OS(BranchProbStr);
2433 OS << BP;
2434 OS << " (total count : " << TotalCount << ")";
2435 Function *F = TI->getParent()->getParent();
2437 ORE.emit([&]() {
2438 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2439 << BrCondStr << " is true with probability : " << BranchProbStr;
2440 });
2441 }
2442}
2443
2444namespace llvm {
2445
2447 MDBuilder MDB(M->getContext());
2448 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2450}
2451
2452template <> struct GraphTraits<PGOUseFunc *> {
2453 using NodeRef = const BasicBlock *;
2456
2457 static NodeRef getEntryNode(const PGOUseFunc *G) {
2458 return &G->getFunc().front();
2459 }
2460
2462 return succ_begin(N);
2463 }
2464
2465 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2466
2467 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2468 return nodes_iterator(G->getFunc().begin());
2469 }
2470
2471 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2472 return nodes_iterator(G->getFunc().end());
2473 }
2474};
2475
2476template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2477 explicit DOTGraphTraits(bool isSimple = false)
2479
2480 static std::string getGraphName(const PGOUseFunc *G) {
2481 return std::string(G->getFunc().getName());
2482 }
2483
2484 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2485 std::string Result;
2486 raw_string_ostream OS(Result);
2487
2488 OS << getSimpleNodeName(Node) << ":\\l";
2489 PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2490 OS << "Count : ";
2491 if (BI && BI->Count)
2492 OS << *BI->Count << "\\l";
2493 else
2494 OS << "Unknown\\l";
2495
2496 if (!PGOInstrSelect)
2497 return Result;
2498
2499 for (const Instruction &I : *Node) {
2500 if (!isa<SelectInst>(&I))
2501 continue;
2502 // Display scaled counts for SELECT instruction:
2503 OS << "SELECT : { T = ";
2504 uint64_t TC, FC;
2505 bool HasProf = extractBranchWeights(I, TC, FC);
2506 if (!HasProf)
2507 OS << "Unknown, F = Unknown }\\l";
2508 else
2509 OS << TC << ", F = " << FC << " }\\l";
2510 }
2511 return Result;
2512 }
2513};
2514
2515} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
This file contains the simple types necessary to represent the attributes associated with functions a...
This file finds the minimum set of blocks on a CFG that must be instrumented to infer execution cover...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
post inline ee instrument
static BasicBlock * getInstrBB(CFGMST< Edge, BBInfo > &MST, Edge &E, const DenseSet< const BasicBlock * > &ExecBlocks)
#define DEBUG_TYPE
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INSTR_PROF_QUOTE(x)
#define VARIANT_MASK_CSIR_PROF
#define VARIANT_MASK_DBG_CORRELATE
#define INSTR_PROF_RAW_VERSION
#define INSTR_PROF_RAW_VERSION_VAR
#define VARIANT_MASK_TEMPORAL_PROF
#define VARIANT_MASK_IR_PROF
#define VARIANT_MASK_BYTE_COVERAGE
#define VARIANT_MASK_INSTR_ENTRY
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY
#define VARIANT_MASK_INSTR_LOOP_ENTRIES
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static constexpr StringLiteral Filename
static GlobalVariable * createIRLevelProfileFlagVar(Module &M, PGOInstrumentationType InstrumentationType)
static cl::opt< std::string > PGOTestProfileRemappingFile("pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile remapping file. This is mainly for " "test purpose."))
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI)
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx)
static cl::opt< unsigned > MaxNumMemOPAnnotations("memop-max-annotations", cl::init(4), cl::Hidden, cl::desc("Max number of precise value annotations for a single memop" "intrinsic"))
static cl::opt< unsigned > MaxNumAnnotations("icp-max-annotations", cl::init(3), cl::Hidden, cl::desc("Max number of annotations for a single indirect " "call callsite"))
static bool skipPGOGen(const Function &F)
static void collectComdatMembers(Module &M, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
static void populateEHOperandBundle(VPCandidateInfo &Cand, DenseMap< BasicBlock *, ColorVector > &BlockColors, SmallVectorImpl< OperandBundleDef > &OpBundles)
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, BranchProbabilityInfo &NBPI, uint64_t HotCountThreshold, uint64_t ColdCountThreshold)
static cl::opt< bool > DoComdatRenaming("do-comdat-renaming", cl::init(false), cl::Hidden, cl::desc("Append function hash to the name of COMDAT function to avoid " "function hash mismatch due to the preinliner"))
static bool annotateAllFunctions(Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, vfs::FileSystem &FS, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, bool IsCS)
static void setupBBInfoEdges(const FuncPGOInstrumentation< PGOUseEdge, PGOUseBBInfo > &FuncInfo)
Set up InEdges/OutEdges for all BBs in the MST.
static bool skipPGOUse(const Function &F)
static bool canRenameComdat(Function &F, std::unordered_multimap< Comdat *, GlobalValue * > &ComdatMembers)
ValueProfileCollector::CandidateInfo VPCandidateInfo
static bool InstrumentAllFunctions(Module &M, function_ref< TargetLibraryInfo &(Function &)> LookupTLI, function_ref< BranchProbabilityInfo *(Function &)> LookupBPI, function_ref< BlockFrequencyInfo *(Function &)> LookupBFI, function_ref< LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType)
static uint64_t sumEdgeCount(const ArrayRef< PGOUseEdge * > Edges)
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind)
static cl::opt< bool > DisableValueProfiling("disable-vp", cl::init(false), cl::Hidden, cl::desc("Disable Value Profiling"))
static std::string getSimpleNodeName(const BasicBlock *Node)
static bool isIndirectBrTarget(BasicBlock *BB)
static cl::opt< std::string > PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path of profile data file. This is " "mainly for test purpose."))
static std::string getBranchCondString(Instruction *TI)
static const char * ValueProfKindDescr[]
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
std::pair< BasicBlock *, BasicBlock * > Edge
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
StringSet - A set-like wrapper for the StringMap.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1134
Class for arbitrary precision integers.
Definition APInt.h:78
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI bool isIrrLoopHeader(const BasicBlock *BB)
Returns true if BB is an irreducible loop header block.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Edge & addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W)
Definition CFGMST.h:304
const std::vector< std::unique_ptr< Edge > > & allEdges() const
Definition CFGMST.h:341
size_t numEdges() const
Definition CFGMST.h:347
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
LLVM_ABI StringRef getName() const
Definition Comdat.cpp:28
void setSelectionKind(SelectionKind Val)
Definition Comdat.h:48
SelectionKind getSelectionKind() const
Definition Comdat.h:47
Conditional Branch instruction.
Value * getCondition() const
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Diagnostic information for the PGO profiler.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Base class for error info classes.
Definition Error.h:44
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Class to represent profile counts.
Definition Function.h:299
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:615
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ AvailableExternallyLinkage
Available for inspection, not emission.
Definition GlobalValue.h:54
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
This instruction compares its operands according to the predicate given to the constructor.
static Expected< std::unique_ptr< IndexedInstrProfReader > > create(const Twine &Path, vfs::FileSystem &FS, const Twine &RemappingPath="")
Factory method to create an indexed reader.
uint64_t getMaximumFunctionCount(bool UseCS)
Return the maximum of all known function counts.
Expected< NamedInstrProfRecord > getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName="", uint64_t *MismatchedFuncSum=nullptr)
Return the NamedInstrProfRecord associated with FuncName and FuncHash.
Base class for instruction visitors.
Definition InstVisitor.h:78
static bool canInstrumentCallsite(const CallBase &CB)
instrprof_error get() const
Definition InstrProf.h:464
std::string message() const override
Return the error message as a string.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
uint32_t getCRC() const
Definition CRC.h:53
LLVM_ABI void update(ArrayRef< uint8_t > Data)
Definition CRC.cpp:103
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LLVM_ABI MDString * createString(StringRef Str)
Return the given string as metadata.
Definition MDBuilder.cpp:21
LLVM_ABI MDNode * createIrrLoopHeaderWeight(uint64_t Weight)
Return metadata containing an irreducible loop header weight.
Metadata node.
Definition Metadata.h:1080
Tuple of metadata.
Definition Metadata.h:1500
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Diagnostic information for optimization analysis remarks.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
LLVM_ABI PGOInstrumentationUse(std::string Filename="", std::string RemappingFilename="", bool IsCS=false, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI uint64_t getOrCompColdCountThreshold() const
Returns ColdCountThreshold if set.
LLVM_ABI bool isColdCount(uint64_t C) const
Returns true if count C is considered cold.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCount(uint64_t C) const
Returns true if count C is considered hot.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
EltTy front() const
unsigned size() const
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false, bool NoDetails=false) const
Print the current type.
Value * getOperand(unsigned i) const
Definition User.h:207
std::vector< CandidateInfo > get(InstrProfValueKind Kind) const
returns a list of value profiling candidates of the given kind
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
A raw_ostream that writes to an std::string.
The virtual file system interface.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
uint64_t getFuncHash(const FuncRecordTy *Record)
Return the structural hash associated with the function.
void checkExpectAnnotations(const Instruction &I, ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
void write64le(void *P, uint64_t V)
Definition Endian.h:478
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > PGOTreatUnknownAsCold("pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, cl::desc("For cold function instrumentation, treat count unknown(e.g. " "unprofiled) functions as cold."))
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static cl::opt< bool > PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " "memory intrinsic size profiling."))
LLVM_ABI void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count)
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI std::string getPGOFuncName(const Function &F, bool InLTO=false, uint64_t Version=INSTR_PROF_INDEX_VERSION)
Please use getIRPGOFuncName for LLVM IR instrumentation.
static cl::opt< bool > PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", cl::desc("Create a dot file of CFGs with block " "coverage inference information"))
LLVM_ABI void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName)
Create the PGOFuncName meta data if PGOFuncName is different from function's raw name.
LLVM_ABI unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ)
Search for the specified successor of basic block BB and return its position in the terminator instru...
Definition CFG.cpp:90
LLVM_ABI std::string getIRPGOFuncName(const Function &F, bool InLTO=false)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI void createProfileSamplingVar(Module &M)
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FuncHash
Definition InstrProf.h:78
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
LLVM_ABI DenseMap< BasicBlock *, ColorVector > colorEHFunclets(Function &F)
If an EH funclet personality is in use (see isFuncletEHPersonality), this will recompute which blocks...
LLVM_ABI void createPGONameMetadata(GlobalObject &GO, StringRef PGOName)
Create the PGOName metadata if a global object's PGO name is different from its mangled name.
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static cl::opt< bool > PGOBlockCoverage("pgo-block-coverage", cl::desc("Use this option to enable basic block coverage instrumentation"))
FunctionAddr NumCounters
Definition InstrProf.h:91
cl::opt< bool > PGOWarnMissing
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
cl::opt< unsigned > MaxNumVTableAnnotations("icp-max-num-vtables", cl::init(6), cl::Hidden, cl::desc("Max number of vtables annotated for a vtable load instruction."))
static cl::opt< bool > PGOTemporalInstrumentation("pgo-temporal-instrumentation", cl::desc("Use this option to enable temporal instrumentation"))
cl::opt< bool > EnableVTableProfileUse("enable-vtable-profile-use", cl::init(false), cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " "profiles will be used by ICP pass for more efficient indirect " "call sequence. If false, type profiles won't be used."))
bool isScopedEHPersonality(EHPersonality Pers)
Returns true if this personality uses scope-style EH IR instructions: catchswitch,...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
LLVM_ABI GlobalVariable * createPGOFuncNameVar(Function &F, StringRef PGOFuncName)
Create and return the global variable for function name used in PGO instrumentation.
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
static cl::opt< bool > EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, cl::desc("When this option is on, the annotated " "branch probability will be emitted as " "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation"))
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr NumValueSites[IPVK_Last+1]
Definition InstrProf.h:93
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
static cl::opt< unsigned > PGOVerifyBFIRatio("pgo-verify-bfi-ratio", cl::init(2), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: only print out " "mismatched BFI if the difference percentage is greater than " "this value (in percentage)."))
static cl::opt< bool > PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false), cl::Hidden, cl::desc("Force to instrument loop entries."))
static cl::opt< unsigned > PGOFunctionSizeThreshold("pgo-function-size-threshold", cl::Hidden, cl::desc("Do not instrument functions smaller than this threshold."))
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static cl::opt< bool > PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
static cl::opt< PGOViewCountsType > PGOViewRawCounts("pgo-view-raw-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text " "with raw profile counts from " "profile data. See also option " "-pgo-view-counts. To limit graph " "display to only one function, use " "filtering option -view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
static cl::opt< bool > PGOVerifyBFI("pgo-verify-bfi", cl::init(false), cl::Hidden, cl::desc("Print out mismatched BFI counts after setting profile metadata " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > NoPGOWarnMismatch
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
static cl::opt< uint64_t > PGOColdInstrumentEntryThreshold("pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, cl::desc("For cold function instrumentation, skip instrumenting functions " "whose entry count is above the given value."))
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
InstrProfValueKind
Definition InstrProf.h:311
cl::opt< PGOViewCountsType > PGOViewCounts("pgo-view-counts", cl::Hidden, cl::desc("A boolean option to show CFG dag or text with " "block profile counts and branch probabilities " "right after PGO profile annotation step. The " "profile counts are computed using branch " "probabilities from the runtime profile data and " "block frequency propagation algorithm. To view " "the raw counts from the profile, use option " "-pgo-view-raw-counts instead. To limit graph " "display to only one function, use filtering option " "-view-bfi-func-name."), cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), clEnumValN(PGOVCT_Graph, "graph", "show a graph."), clEnumValN(PGOVCT_Text, "text", "show in text.")))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
static cl::opt< unsigned > PGOVerifyBFICutoff("pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden, cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below."))
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
LLVM_ABI bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges=false)
Return true if the specified edge is a critical edge.
Definition CFG.cpp:106
cl::opt< bool > PGOInstrumentColdFunctionOnly
cl::list< std::string > CtxPGOSkipCallsiteInstrument("ctx-prof-skip-callsite-instr", cl::Hidden, cl::desc("Do not instrument callsites to functions in this list. Intended " "for testing."))
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
static cl::opt< bool > PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off SELECT " "instruction instrumentation. "))
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1917
TinyPtrVector< BasicBlock * > ColorVector
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
Instruction::const_succ_iterator const_succ_iterator
Definition CFG.h:139
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
static cl::opt< bool > PGOFunctionEntryCoverage("pgo-function-entry-coverage", cl::Hidden, cl::desc("Use this option to enable function entry coverage instrumentation."))
static cl::opt< unsigned > PGOFunctionCriticalEdgeThreshold("pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold."))
uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale)
Scale an individual branch count.
static cl::opt< bool > PGOVerifyHotBFI("pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " "becomes non-hot, or a cold raw profile count becomes hot. " "The print is enabled under -Rpass-analysis=pgo, or " "internal option -pass-remarks-analysis=pgo."))
uint64_t calculateCountScale(uint64_t MaxCount)
Calculate what to divide by to scale counts.
LLVM_ABI SmallVector< uint32_t > downscaleWeights(ArrayRef< uint64_t > Weights, std::optional< uint64_t > KnownMaxCount=std::nullopt)
downscale the given weights preserving the ratio.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
static cl::opt< bool > PGOInstrumentEntry("pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static cl::opt< std::string > PGOTraceFuncHash("pgo-trace-func-hash", cl::init("-"), cl::Hidden, cl::value_desc("function name"), cl::desc("Trace the hash of the function with this name."))
cl::opt< bool > NoPGOWarnMismatchComdatWeak
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
static std::string getGraphName(const PGOUseFunc *G)
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph)
DefaultDOTGraphTraits(bool simple=false)
static ChildIteratorType child_end(const NodeRef N)
static NodeRef getEntryNode(const PGOUseFunc *G)
static ChildIteratorType child_begin(const NodeRef N)
static nodes_iterator nodes_end(const PGOUseFunc *G)
static nodes_iterator nodes_begin(const PGOUseFunc *G)
pointer_iterator< Function::const_iterator > nodes_iterator
std::vector< uint64_t > Counts
Definition InstrProf.h:895
CountPseudoKind getCountPseudoKind() const
Definition InstrProf.h:993
uint32_t getNumValueSites(uint32_t ValueKind) const
Return the number of instrumented sites for ValueKind.
Definition InstrProf.h:1097
static void setCSFlagInHash(uint64_t &FuncHash)
Definition InstrProf.h:1078
static constexpr uint64_t FUNC_HASH_MASK
Definition InstrProf.h:1061