LLVM 23.0.0git
InstrProfiling.cpp
Go to the documentation of this file.
1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DIBuilder.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/GlobalAlias.h"
38#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/MDBuilder.h"
47#include "llvm/IR/Module.h"
49#include "llvm/IR/Type.h"
50#include "llvm/Pass.h"
56#include "llvm/Support/Error.h"
64#include <algorithm>
65#include <cassert>
66#include <cstdint>
67#include <string>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "instrprof"
72
73namespace llvm {
74// Command line option to enable vtable value profiling. Defined in
75// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
78 "profile-correlate",
79 cl::desc("Use debug info or binary file to correlate profiles."),
82 "No profile correlation"),
84 "Use debug info to correlate"),
86 "Use binary to correlate")));
87} // namespace llvm
88
89namespace {
90
91cl::opt<bool> DoHashBasedCounterSplit(
92 "hash-based-counter-split",
93 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
94 cl::init(true));
95
97 RuntimeCounterRelocation("runtime-counter-relocation",
98 cl::desc("Enable relocating counters at runtime."),
99 cl::init(false));
100
101cl::opt<bool> ValueProfileStaticAlloc(
102 "vp-static-alloc",
103 cl::desc("Do static counter allocation for value profiler"),
104 cl::init(true));
105
106cl::opt<double> NumCountersPerValueSite(
107 "vp-counters-per-site",
108 cl::desc("The average number of profile counters allocated "
109 "per value profiling site."),
110 // This is set to a very small value because in real programs, only
111 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
112 // For those sites with non-zero profile, the average number of targets
113 // is usually smaller than 2.
114 cl::init(1.0));
115
116cl::opt<bool> AtomicCounterUpdateAll(
117 "instrprof-atomic-counter-update-all",
118 cl::desc("Make all profile counter updates atomic (for testing only)"),
119 cl::init(false));
120
121cl::opt<bool> VerifyAtomicPromotion(
122 "verify-atomic-counter-promoted",
123 cl::desc("Check that all profile counter updates were made atomic; no-op "
124 "if atomic updates are not requested (-fprofile-update=atomic)"),
125 cl::init(false));
126
127cl::opt<bool> AtomicCounterUpdatePromoted(
128 "atomic-counter-update-promoted",
129 cl::desc("Do counter update using atomic fetch add "
130 " for promoted counters only"),
131 cl::init(false));
132
133cl::opt<bool> AtomicFirstCounter(
134 "atomic-first-counter",
135 cl::desc("Use atomic fetch add for first counter in a function (usually "
136 "the entry counter)"),
137 cl::init(false));
138
139cl::opt<bool> ConditionalCounterUpdate(
140 "conditional-counter-update",
141 cl::desc("Do conditional counter updates in single byte counters mode)"),
142 cl::init(false));
143
144// If the option is not specified, the default behavior about whether
145// counter promotion is done depends on how instrumentation lowering
146// pipeline is setup, i.e., the default value of true of this option
147// does not mean the promotion will be done by default. Explicitly
148// setting this option can override the default behavior.
149cl::opt<bool> DoCounterPromotion("do-counter-promotion",
150 cl::desc("Do counter register promotion"),
151 cl::init(false));
152cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
153 "max-counter-promotions-per-loop", cl::init(20),
154 cl::desc("Max number counter promotions per loop to avoid"
155 " increasing register pressure too much"));
156
157// A debug option
159 MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
160 cl::desc("Max number of allowed counter promotions"));
161
162cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
163 "speculative-counter-promotion-max-exiting", cl::init(3),
164 cl::desc("The max number of exiting blocks of a loop to allow "
165 " speculative counter promotion"));
166
167cl::opt<bool> SpeculativeCounterPromotionToLoop(
168 "speculative-counter-promotion-to-loop",
169 cl::desc("When the option is false, if the target block is in a loop, "
170 "the promotion will be disallowed unless the promoted counter "
171 " update can be further/iteratively promoted into an acyclic "
172 " region."));
173
174static cl::opt<unsigned> OffloadPGOSampling(
175 "offload-pgo-sampling",
176 cl::desc("Log2 of the sampling period for offload PGO instrumentation. "
177 "Only 1 in every 2^N blocks is instrumented. "
178 "0 = all blocks, 1 = 50%, 2 = 25%, 3 = 12.5% (default). "
179 "Higher values reduce overhead at the cost of sparser profiles."),
180 cl::init(3));
181
182cl::opt<bool> IterativeCounterPromotion(
183 "iterative-counter-promotion", cl::init(true),
184 cl::desc("Allow counter promotion across the whole loop nest."));
185
186cl::opt<bool> SkipRetExitBlock(
187 "skip-ret-exit-block", cl::init(true),
188 cl::desc("Suppress counter promotion if exit blocks contain ret."));
189
190static cl::opt<bool> SampledInstr("sampled-instrumentation",
191 cl::desc("Do PGO instrumentation sampling"));
192
193static cl::opt<unsigned> SampledInstrPeriod(
194 "sampled-instr-period",
195 cl::desc("Set the profile instrumentation sample period. A sample period "
196 "of 0 is invalid. For each sample period, a fixed number of "
197 "consecutive samples will be recorded. The number is controlled "
198 "by 'sampled-instr-burst-duration' flag. The default sample "
199 "period of 65536 is optimized for generating efficient code that "
200 "leverages unsigned short integer wrapping in overflow, but this "
201 "is disabled under simple sampling (burst duration = 1)."),
202 cl::init(USHRT_MAX + 1));
203
204static cl::opt<unsigned> SampledInstrBurstDuration(
205 "sampled-instr-burst-duration",
206 cl::desc("Set the profile instrumentation burst duration, which can range "
207 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
208 "This number of samples will be recorded for each "
209 "'sampled-instr-period' count update. Setting to 1 enables simple "
210 "sampling, in which case it is recommended to set "
211 "'sampled-instr-period' to a prime number."),
212 cl::init(200));
213
214struct SampledInstrumentationConfig {
215 unsigned BurstDuration;
216 unsigned Period;
217 bool UseShort;
218 bool IsSimpleSampling;
219 bool IsFastSampling;
220};
221
222static SampledInstrumentationConfig getSampledInstrumentationConfig() {
223 SampledInstrumentationConfig config;
224 config.BurstDuration = SampledInstrBurstDuration.getValue();
225 config.Period = SampledInstrPeriod.getValue();
226 if (config.BurstDuration > config.Period)
228 "SampledBurstDuration must be less than or equal to SampledPeriod");
229 if (config.Period == 0 || config.BurstDuration == 0)
231 "SampledPeriod and SampledBurstDuration must be greater than 0");
232 config.IsSimpleSampling = (config.BurstDuration == 1);
233 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
234 // style code.
235 config.IsFastSampling =
236 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
237 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
238 return config;
239}
240
241using LoadStorePair = std::pair<Instruction *, Instruction *>;
242
243static void makeAtomic(Instruction *Load, Instruction *Store) {
244 auto *Addition = dyn_cast<BinaryOperator>(Store->getOperand(0));
245 assert(Addition && Addition->getOpcode() == Instruction::BinaryOps::Add);
246 auto *Addend = Addition->getOperand(1);
247
248 IRBuilder<> Builder(Load);
249 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Store->getOperand(1), Addend,
251 Store->eraseFromParent();
252 Addition->eraseFromParent();
253 Load->eraseFromParent();
254}
255
256static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
257 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
258 if (!MD)
259 return 0;
260
261 // If the flag is a ConstantAsMetadata, it should be an integer representable
262 // in 64-bits.
263 return cast<ConstantInt>(MD->getValue())->getZExtValue();
264}
265
266static bool enablesValueProfiling(const Module &M) {
267 return isIRPGOFlagSet(&M) ||
268 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
269}
270
271// Conservatively returns true if value profiling is enabled.
272static bool profDataReferencedByCode(const Module &M) {
273 return enablesValueProfiling(M);
274}
275
276class InstrLowerer final {
277public:
278 InstrLowerer(Module &M, const InstrProfOptions &Options,
279 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
280 bool IsCS)
281 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
282 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
283
284 bool lower();
285
286private:
287 Module &M;
288 const InstrProfOptions Options;
289 const Triple TT;
290 // Is this lowering for the context-sensitive instrumentation.
291 const bool IsCS;
292
293 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
294
295 const bool DataReferencedByCode;
296
297 struct PerFunctionProfileData {
298 uint32_t NumValueSites[IPVK_Last + 1] = {};
299 GlobalVariable *RegionCounters = nullptr;
300 GlobalVariable *UniformCounters =
301 nullptr; // Per-block uniform-entry counters
302 GlobalVariable *DataVar = nullptr;
303 GlobalVariable *RegionBitmaps = nullptr;
304 uint32_t NumBitmapBytes = 0;
305
306 PerFunctionProfileData() = default;
307 };
308 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
309 // Key is virtual table variable, value is 'VTableProfData' in the form of
310 // GlobalVariable.
311 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
312 /// If runtime relocation is enabled, this maps functions to the load
313 /// instruction that produces the profile relocation bias.
314 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
315 std::vector<GlobalValue *> CompilerUsedVars;
316 std::vector<GlobalValue *> UsedVars;
317 std::vector<GlobalVariable *> ReferencedNames;
318 // The list of virtual table variables of which the VTableProfData is
319 // collected.
320 std::vector<GlobalVariable *> ReferencedVTables;
321 GlobalVariable *NamesVar = nullptr;
322 size_t NamesSize = 0;
323
324 StructType *ProfileDataTy = nullptr;
325
326 // vector of counter load/store pairs to be register promoted.
327 std::vector<LoadStorePair> PromotionCandidates;
328
329 int64_t TotalCountersPromoted = 0;
330
331 // Per-function cache of invariant values for GPU PGO instrumentation.
332 // Computed once at the function entry and reused across all instrumentation
333 // points to avoid redundant IR and help the optimizer.
334 struct GPUPGOInvariants {
335 Value *Matched = nullptr;
336 bool WaveSizeStored = false;
337 };
338 DenseMap<Function *, GPUPGOInvariants> GPUInvariantsCache;
339
340 /// Emit invariant PGO values at the function entry block and cache them.
341 GPUPGOInvariants &getOrCreateGPUInvariants(Function *F);
342
343 /// Lower instrumentation intrinsics in the function. Returns true if there
344 /// any lowering.
345 bool lowerIntrinsics(Function *F);
346
347 /// Register-promote counter loads and stores in loops.
348 void promoteCounterLoadStores(Function *F);
349
350 /// Returns true if relocating counters at runtime is enabled.
351 bool isRuntimeCounterRelocationEnabled() const;
352
353 /// Returns true if profile counter update register promotion is enabled.
354 bool isCounterPromotionEnabled() const;
355
356 /// Returns true if profile counter updates should be atomic.
357 bool isAtomic() const;
358
359 /// Return true if profile sampling is enabled.
360 bool isSamplingEnabled() const;
361
362 /// Count the number of instrumented value sites for the function.
363 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
364
365 /// Replace instrprof.value.profile with a call to runtime library.
366 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
367
368 /// Replace instrprof.cover with a store instruction to the coverage byte.
369 void lowerCover(InstrProfCoverInst *Inc);
370
371 /// Replace instrprof.timestamp with a call to
372 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
373 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
374
375 /// Replace instrprof.increment with an increment of the appropriate value.
376 void lowerIncrement(InstrProfIncrementInst *Inc);
377
378 /// Force emitting of name vars for unused functions.
379 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
380
381 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
382 /// using the index represented by the a temp value into a bitmap.
383 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
384
385 /// Get the Bias value for data to access mmap-ed area.
386 /// Create it if it hasn't been seen.
387 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
388
389 /// Compute the address of the counter value that this profiling instruction
390 /// acts on.
391 Value *getCounterAddress(InstrProfCntrInstBase *I);
392
393 /// Lower the incremental instructions under profile sampling predicates.
394 void doSampling(Instruction *I);
395
396 /// Get the region counters for an increment, creating them if necessary.
397 ///
398 /// If the counter array doesn't yet exist, the profile data variables
399 /// referring to them will also be created.
400 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
401
402 /// Get the uniform entry counters for GPU divergence tracking.
403 /// These counters track how often blocks are entered with all lanes active.
404 GlobalVariable *getOrCreateUniformCounters(InstrProfCntrInstBase *Inc);
405
406 /// Create the region counters.
407 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
408 StringRef Name,
410
411 /// Compute the address of the test vector bitmap that this profiling
412 /// instruction acts on.
413 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
414
415 /// Get the region bitmaps for an increment, creating them if necessary.
416 ///
417 /// If the bitmap array doesn't yet exist, the profile data variables
418 /// referring to them will also be created.
419 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
420
421 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
422 /// an MC/DC Decision region. The number of bytes required is indicated by
423 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
424 /// as part of setupProfileSection() and is conceptually very similar to
425 /// what is done for profile data counters in createRegionCounters().
426 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
427 StringRef Name,
429
430 /// Set Comdat property of GV, if required.
431 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
432
433 /// Setup the sections into which counters and bitmaps are allocated.
434 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
435 InstrProfSectKind IPSK);
436
437 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
438 void createDataVariable(InstrProfCntrInstBase *Inc);
439
440 /// Get the counters for virtual table values, creating them if necessary.
441 void getOrCreateVTableProfData(GlobalVariable *GV);
442
443 /// Emit the section with compressed function names.
444 void emitNameData();
445
446 /// Emit the section with compressed vtable names.
447 void emitVTableNames();
448
449 /// Emit value nodes section for value profiling.
450 void emitVNodes();
451
452 /// Emit runtime registration functions for each profile data variable.
453 void emitRegistration();
454
455 /// Emit the necessary plumbing to pull in the runtime initialization.
456 /// Returns true if a change was made.
457 bool emitRuntimeHook();
458
459 /// Add uses of our data variables and runtime hook.
460 void emitUses();
461
462 /// Create a static initializer for our data, on platforms that need it,
463 /// and for any profile output file that was specified.
464 void emitInitialization();
465
466 /// Return the __llvm_profile_data struct type.
467 StructType *getProfileDataTy();
468};
469
470///
471/// A helper class to promote one counter RMW operation in the loop
472/// into register update.
473///
474/// RWM update for the counter will be sinked out of the loop after
475/// the transformation.
476///
477class PGOCounterPromoterHelper : public LoadAndStorePromoter {
478public:
479 PGOCounterPromoterHelper(
480 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
481 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
482 ArrayRef<Instruction *> InsertPts,
483 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
484 LoopInfo &LI, bool IsAtomic)
485 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
486 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI),
487 IsAtomic(IsAtomic) {
490 SSA.AddAvailableValue(PH, Init);
491 }
492
493 void doExtraRewritesBeforeFinalDeletion() override {
494 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
495 BasicBlock *ExitBlock = ExitBlocks[i];
496 Instruction *InsertPos = InsertPts[i];
497 // Get LiveIn value into the ExitBlock. If there are multiple
498 // predecessors, the value is defined by a PHI node in this
499 // block.
500 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
501 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
502 Type *Ty = LiveInValue->getType();
503 IRBuilder<> Builder(InsertPos);
504 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
505 // If isRuntimeCounterRelocationEnabled() is true then the address of
506 // the store instruction is computed with two instructions in
507 // InstrProfiling::getCounterAddress(). We need to copy those
508 // instructions to this block to compute Addr correctly.
509 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
510 // %Addr = inttoptr i64 %BiasAdd to i64*
511 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
512 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
513 Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
514 Addr = Builder.CreateIntToPtr(BiasInst,
515 PointerType::getUnqual(Ty->getContext()));
516 }
517 auto *TargetLoop =
518 IterativeCounterPromotion ? LI.getLoopFor(ExitBlock) : nullptr;
519 // Generate the relaxed atomic RMW if we've asked for it and no more
520 // promotion is possible.
521 if ((IsAtomic && !TargetLoop) || AtomicCounterUpdatePromoted)
522 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
523 MaybeAlign(), AtomicOrdering::Monotonic);
524 else {
525 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
526 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
527 auto *NewStore = Builder.CreateStore(NewVal, Addr);
528
529 // Now update the parent loop's candidate list:
530 if (TargetLoop)
531 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
532 }
533 }
534 }
535
536private:
537 Instruction *Store;
538 ArrayRef<BasicBlock *> ExitBlocks;
539 ArrayRef<Instruction *> InsertPts;
540 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
541 LoopInfo &LI;
542 const bool IsAtomic;
543};
544
545/// A helper class to do register promotion for all profile counter
546/// updates in a loop.
547///
548class PGOCounterPromoter {
549public:
550 PGOCounterPromoter(
551 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
552 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI, bool IsAtomic)
553 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI),
554 IsAtomic(IsAtomic) {
555
556 // Skip collection of ExitBlocks and InsertPts for loops that will not be
557 // able to have counters promoted.
558 SmallVector<BasicBlock *, 8> LoopExitBlocks;
559 SmallPtrSet<BasicBlock *, 8> BlockSet;
560
561 L.getExitBlocks(LoopExitBlocks);
562 if (!isPromotionPossible(&L, LoopExitBlocks))
563 return;
564
565 for (BasicBlock *ExitBlock : LoopExitBlocks) {
566 if (BlockSet.insert(ExitBlock).second &&
567 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
568 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
569 })) {
570 ExitBlocks.push_back(ExitBlock);
571 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
572 }
573 }
574 }
575
576 bool run(int64_t *NumPromoted) {
577 bool RC = promoteCandidates(NumPromoted);
578 // In certain case, e.g. with -fprofile-update=atomic, we want to generate
579 // atomic updates of the PGO counters, but also perform promotion of these
580 // updates out of loops to reduce train time. The strategy is:
581 // 1) generate non-atomic load-increment-store sequence of instructions
582 // during lowerIntrinsics phase,
583 // 2) perform the promotion (in promoteCandidates function), then
584 // 3) convert all (promoted and unpromotable) updates to atomicRMW.
585 // This requires that promoted candidates are set to nullptr in the
586 // LoopToCandidates[&L] array by the promoteCandidates() function.
587 if (IsAtomic)
588 for (auto &Cand : LoopToCandidates[&L])
589 if (Cand.first != nullptr && Cand.second != nullptr)
590 makeAtomic(Cand.first, Cand.second);
591 return RC;
592 }
593
594private:
595 bool promoteCandidates(int64_t *NumPromoted) {
596 // Skip 'infinite' loops:
597 if (ExitBlocks.size() == 0)
598 return false;
599
600 // Skip if any of the ExitBlocks contains a ret instruction.
601 // This is to prevent dumping of incomplete profile -- if the
602 // the loop is a long running loop and dump is called in the middle
603 // of the loop, the result profile is incomplete.
604 // FIXME: add other heuristics to detect long running loops.
605 if (SkipRetExitBlock) {
606 for (auto *BB : ExitBlocks)
607 if (isa<ReturnInst>(BB->getTerminator()))
608 return false;
609 }
610
611 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
612 if (MaxProm == 0)
613 return false;
614
615 [[maybe_unused]] auto *Ptr = LoopToCandidates.getPointerIntoBucketsArray();
616 unsigned Promoted = 0;
617 for (auto &Cand : LoopToCandidates[&L]) {
619 SSAUpdater SSA(&NewPHIs);
620 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
621
622 // If BFI is set, we will use it to guide the promotions.
623 if (BFI) {
624 auto *BB = Cand.first->getParent();
625 auto InstrCount = BFI->getBlockProfileCount(BB);
626 if (!InstrCount)
627 continue;
628 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
629 // If the average loop trip count is not greater than 1.5, we skip
630 // promotion.
631 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
632 continue;
633 }
634
635 PGOCounterPromoterHelper Promoter(
636 Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(),
637 ExitBlocks, InsertPts, LoopToCandidates, LI, IsAtomic);
638 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
639
640 assert(LoopToCandidates.isPointerIntoBucketsArray(Ptr) &&
641 "References into LoopToCandidates might be invalid");
642 Cand = {nullptr, nullptr};
643
644 Promoted++;
645 if (Promoted >= MaxProm)
646 break;
647
648 (*NumPromoted)++;
649 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
650 break;
651 }
652
653 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
654 << L.getLoopDepth() << ")\n");
655 return Promoted != 0;
656 }
657
658private:
659 bool allowSpeculativeCounterPromotion(Loop *LP) {
660 SmallVector<BasicBlock *, 8> ExitingBlocks;
661 L.getExitingBlocks(ExitingBlocks);
662 // Not considierered speculative.
663 if (ExitingBlocks.size() == 1)
664 return true;
665 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
666 return false;
667 return true;
668 }
669
670 // Check whether the loop satisfies the basic conditions needed to perform
671 // Counter Promotions.
672 bool
673 isPromotionPossible(Loop *LP,
674 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
675 // We can't insert into a catchswitch.
676 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
677 return isa<CatchSwitchInst>(Exit->getTerminator());
678 }))
679 return false;
680
681 if (!LP->hasDedicatedExits())
682 return false;
683
684 BasicBlock *PH = LP->getLoopPreheader();
685 if (!PH)
686 return false;
687
688 return true;
689 }
690
691 // Returns the max number of Counter Promotions for LP.
692 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
693 SmallVector<BasicBlock *, 8> LoopExitBlocks;
694 LP->getExitBlocks(LoopExitBlocks);
695 if (!isPromotionPossible(LP, LoopExitBlocks))
696 return 0;
697
698 SmallVector<BasicBlock *, 8> ExitingBlocks;
699 LP->getExitingBlocks(ExitingBlocks);
700
701 // If BFI is set, we do more aggressive promotions based on BFI.
702 if (BFI)
703 return (unsigned)-1;
704
705 // Not considierered speculative.
706 if (ExitingBlocks.size() == 1)
707 return MaxNumOfPromotionsPerLoop;
708
709 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
710 return 0;
711
712 // Whether the target block is in a loop does not matter:
713 if (SpeculativeCounterPromotionToLoop)
714 return MaxNumOfPromotionsPerLoop;
715
716 // Now check the target block:
717 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
718 for (auto *TargetBlock : LoopExitBlocks) {
719 auto *TargetLoop = LI.getLoopFor(TargetBlock);
720 if (!TargetLoop)
721 continue;
722 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
723 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
724 MaxProm =
725 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
726 PendingCandsInTarget);
727 }
728 return MaxProm;
729 }
730
731 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
732 SmallVector<BasicBlock *, 8> ExitBlocks;
733 SmallVector<Instruction *, 8> InsertPts;
734 Loop &L;
735 LoopInfo &LI;
736 BlockFrequencyInfo *BFI;
737 const bool IsAtomic; // Whether to convert counter updates to atomics.
738};
739
740enum class ValueProfilingCallType {
741 // Individual values are tracked. Currently used for indiret call target
742 // profiling.
743 Default,
744
745 // MemOp: the memop size value profiling.
746 MemOp
747};
748
749} // end anonymous namespace
750
755 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
756 return FAM.getResult<TargetLibraryAnalysis>(F);
757 };
758 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
759 if (!Lowerer.lower())
760 return PreservedAnalyses::all();
761
763}
764
765//
766// Perform instrumentation sampling.
767//
768// There are 3 favors of sampling:
769// (1) Full burst sampling: We transform:
770// Increment_Instruction;
771// to:
772// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
773// Increment_Instruction;
774// }
775// __llvm_profile_sampling__ += 1;
776// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
777// __llvm_profile_sampling__ = 0;
778// }
779//
780// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
781// counters (value-instrumentation and edge instrumentation).
782//
783// (2) Fast burst sampling:
784// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
785// wrap around to zero when overflows. In this case, the second check is
786// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
787// set to 65536 (64K). The code after:
788// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
789// Increment_Instruction;
790// }
791// __llvm_profile_sampling__ += 1;
792//
793// (3) Simple sampling:
794// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
795// __llvm_profile_sampling__ += 1;
796// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
797// __llvm_profile_sampling__ = 0;
798// Increment_Instruction;
799// }
800//
801// Note that, the code snippet after the transformation can still be counter
802// promoted. However, with sampling enabled, counter updates are expected to
803// be infrequent, making the benefits of counter promotion negligible.
804// Moreover, counter promotion can potentially cause issues in server
805// applications, particularly when the counters are dumped without a clean
806// exit. To mitigate this risk, counter promotion is disabled by default when
807// sampling is enabled. This behavior can be overridden using the internal
808// option.
809void InstrLowerer::doSampling(Instruction *I) {
810 if (!isSamplingEnabled())
811 return;
812
813 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
814 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
815 if (config.UseShort)
816 return Builder.getInt16(C);
817 else
818 return Builder.getInt32(C);
819 };
820
821 IntegerType *SamplingVarTy;
822 if (config.UseShort)
823 SamplingVarTy = Type::getInt16Ty(M.getContext());
824 else
825 SamplingVarTy = Type::getInt32Ty(M.getContext());
826 auto *SamplingVar =
828 assert(SamplingVar && "SamplingVar not set properly");
829
830 // Create the condition for checking the burst duration.
831 Instruction *SamplingVarIncr;
832 Value *NewSamplingVarVal;
833 MDBuilder MDB(I->getContext());
834 MDNode *BranchWeight;
835 IRBuilder<> CondBuilder(I);
836 auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
837 if (config.IsSimpleSampling) {
838 // For the simple sampling, just create the load and increments.
839 IRBuilder<> IncBuilder(I);
840 NewSamplingVarVal =
841 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
842 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
843 } else {
844 // For the burst-sampling, create the conditional update.
845 auto *DurationCond = CondBuilder.CreateICmpULE(
846 LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1));
847 BranchWeight = MDB.createBranchWeights(
848 config.BurstDuration, config.Period - config.BurstDuration);
850 DurationCond, I, /* Unreachable */ false, BranchWeight);
851 IRBuilder<> IncBuilder(I);
852 NewSamplingVarVal =
853 IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
854 SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
855 I->moveBefore(ThenTerm->getIterator());
856 }
857
858 if (config.IsFastSampling)
859 return;
860
861 // Create the condition for checking the period.
862 Instruction *ThenTerm, *ElseTerm;
863 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
864 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
865 NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period));
866 BranchWeight = MDB.createBranchWeights(1, config.Period - 1);
867 SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
868 &ElseTerm, BranchWeight);
869
870 // For the simple sampling, the counter update happens in sampling var reset.
871 if (config.IsSimpleSampling)
872 I->moveBefore(ThenTerm->getIterator());
873
874 IRBuilder<> ResetBuilder(ThenTerm);
875 ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
876 SamplingVarIncr->moveBefore(ElseTerm->getIterator());
877}
878
879bool InstrLowerer::lowerIntrinsics(Function *F) {
880 bool MadeChange = false;
881 PromotionCandidates.clear();
883
884 // To ensure compatibility with sampling, we save the intrinsics into
885 // a buffer to prevent potential breakage of the iterator (as the
886 // intrinsics will be moved to a different BB).
887 for (BasicBlock &BB : *F) {
888 for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
889 if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
890 InstrProfInsts.push_back(IP);
891 }
892 }
893
894 for (auto *Instr : InstrProfInsts) {
895 doSampling(Instr);
896 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
897 lowerIncrement(IPIS);
898 MadeChange = true;
899 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
900 lowerIncrement(IPI);
901 MadeChange = true;
902 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
903 lowerTimestamp(IPC);
904 MadeChange = true;
905 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
906 lowerCover(IPC);
907 MadeChange = true;
908 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
909 lowerValueProfileInst(IPVP);
910 MadeChange = true;
911 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
912 IPMP->eraseFromParent();
913 MadeChange = true;
914 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
915 lowerMCDCTestVectorBitmapUpdate(IPBU);
916 MadeChange = true;
917 }
918 }
919
920 if (!MadeChange)
921 return false;
922
923 promoteCounterLoadStores(F);
924 return true;
925}
926
927bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
928 // Mach-O don't support weak external references.
929 if (TT.isOSBinFormatMachO())
930 return false;
931
932 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
933 return RuntimeCounterRelocation;
934
935 // Fuchsia uses runtime counter relocation by default.
936 return TT.isOSFuchsia();
937}
938
939bool InstrLowerer::isSamplingEnabled() const {
940 if (SampledInstr.getNumOccurrences() > 0)
941 return SampledInstr;
942 return Options.Sampling;
943}
944
945bool InstrLowerer::isCounterPromotionEnabled() const {
946 if (DoCounterPromotion.getNumOccurrences() > 0)
947 return DoCounterPromotion;
948 return Options.DoCounterPromotion;
949}
950
951bool InstrLowerer::isAtomic() const {
952 return Options.Atomic || AtomicCounterUpdateAll;
953}
954
955static void doAtomicCheck(Function *F) {
956 for (const llvm::Instruction &I : llvm::instructions(F)) {
957 const Value *Addr = nullptr;
958 if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
959 Addr = LI->getOperand(0);
960 else if (const StoreInst *LI = dyn_cast<StoreInst>(&I))
961 Addr = LI->getOperand(1);
962
963 if (Addr && Addr->stripInBoundsOffsets()->getName().starts_with(
965 LLVM_DEBUG(dbgs() << "Missed candidate: "; I.dump());
966 report_fatal_error("Candidate load/store not converted to atomic");
967 }
968 }
969}
970
971void InstrLowerer::promoteCounterLoadStores(Function *F) {
972 if (!isCounterPromotionEnabled())
973 return;
974
975 DominatorTree DT(*F);
976 LoopInfo LI(DT);
977 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
978
979 std::unique_ptr<BlockFrequencyInfo> BFI;
980 if (Options.UseBFIInPromotion) {
981 std::unique_ptr<BranchProbabilityInfo> BPI;
982 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
983 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
984 }
985
986 for (const auto &LoadStore : PromotionCandidates) {
987 auto *CounterLoad = LoadStore.first;
988 auto *CounterStore = LoadStore.second;
989 BasicBlock *BB = CounterLoad->getParent();
990 Loop *ParentLoop = LI.getLoopFor(BB);
991 if (!ParentLoop) {
992 if (isAtomic())
993 makeAtomic(CounterLoad, CounterStore);
994 continue;
995 }
996 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
997 }
998
1000
1001 // Do a post-order traversal of the loops so that counter updates can be
1002 // iteratively hoisted outside the loop nest.
1003 for (auto *Loop : llvm::reverse(Loops)) {
1004 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get(),
1005 isAtomic());
1006 Promoter.run(&TotalCountersPromoted);
1007 }
1008
1009 if (isAtomic() && VerifyAtomicPromotion)
1011}
1012
1014 // On Fuchsia, we only need runtime hook if any counters are present.
1015 if (TT.isOSFuchsia())
1016 return false;
1017
1018 return true;
1019}
1020
1021/// Check if the module contains uses of any profiling intrinsics.
1023 auto containsIntrinsic = [&](int ID) {
1024 if (auto *F = Intrinsic::getDeclarationIfExists(&M, ID))
1025 return !F->use_empty();
1026 return false;
1027 };
1028 return containsIntrinsic(Intrinsic::instrprof_cover) ||
1029 containsIntrinsic(Intrinsic::instrprof_increment) ||
1030 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
1031 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
1032 containsIntrinsic(Intrinsic::instrprof_value_profile);
1033}
1034
1035bool InstrLowerer::lower() {
1036 bool MadeChange = false;
1037 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
1038 if (NeedsRuntimeHook)
1039 MadeChange = emitRuntimeHook();
1040
1041 if (!IsCS && isSamplingEnabled())
1043
1044 bool ContainsProfiling = containsProfilingIntrinsics(M);
1045 GlobalVariable *CoverageNamesVar =
1046 M.getNamedGlobal(getCoverageUnusedNamesVarName());
1047 // Improve compile time by avoiding linear scans when there is no work.
1048 if (!ContainsProfiling && !CoverageNamesVar)
1049 return MadeChange;
1050
1051 // We did not know how many value sites there would be inside
1052 // the instrumented function. This is counting the number of instrumented
1053 // target value sites to enter it as field in the profile data variable.
1054 for (Function &F : M) {
1055 InstrProfCntrInstBase *FirstProfInst = nullptr;
1056 for (BasicBlock &BB : F) {
1057 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
1058 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
1059 computeNumValueSiteCounts(Ind);
1060 else {
1061 if (FirstProfInst == nullptr &&
1063 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
1064 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
1065 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
1066 static_cast<void>(getOrCreateRegionBitmaps(Params));
1067 }
1068 }
1069 }
1070
1071 // Use a profile intrinsic to create the region counters and data variable.
1072 // Also create the data variable based on the MCDCParams.
1073 if (FirstProfInst != nullptr) {
1074 static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
1075 }
1076 }
1077
1079 for (GlobalVariable &GV : M.globals())
1080 // Global variables with type metadata are virtual table variables.
1081 if (GV.hasMetadata(LLVMContext::MD_type))
1082 getOrCreateVTableProfData(&GV);
1083
1084 for (Function &F : M)
1085 MadeChange |= lowerIntrinsics(&F);
1086
1087 if (CoverageNamesVar) {
1088 lowerCoverageData(CoverageNamesVar);
1089 MadeChange = true;
1090 }
1091
1092 if (!MadeChange)
1093 return false;
1094
1095 emitVNodes();
1096 emitNameData();
1097 emitVTableNames();
1098
1099 // Emit runtime hook for the cases where the target does not unconditionally
1100 // require pulling in profile runtime, and coverage is enabled on code that is
1101 // not eliminated by the front-end, e.g. unused functions with internal
1102 // linkage.
1103 if (!NeedsRuntimeHook && ContainsProfiling)
1104 emitRuntimeHook();
1105
1106 emitRegistration();
1107 emitUses();
1108 emitInitialization();
1109 return true;
1110}
1111
1113 Module &M, const TargetLibraryInfo &TLI,
1114 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1115 LLVMContext &Ctx = M.getContext();
1116 auto *ReturnTy = Type::getVoidTy(M.getContext());
1117
1118 AttributeList AL;
1119 if (auto AK = TLI.getExtAttrForI32Param(false))
1120 AL = AL.addParamAttribute(M.getContext(), 2, AK);
1121
1122 assert((CallType == ValueProfilingCallType::Default ||
1123 CallType == ValueProfilingCallType::MemOp) &&
1124 "Must be Default or MemOp");
1125 Type *ParamTypes[] = {
1126#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1128 };
1129 auto *ValueProfilingCallTy =
1130 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
1131 StringRef FuncName = CallType == ValueProfilingCallType::Default
1134 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
1135}
1136
1137void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1138 GlobalVariable *Name = Ind->getName();
1139 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1140 uint64_t Index = Ind->getIndex()->getZExtValue();
1141 auto &PD = ProfileDataMap[Name];
1142 PD.NumValueSites[ValueKind] =
1143 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
1144}
1145
1146void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1147 // TODO: Value profiling heavily depends on the data section which is omitted
1148 // in lightweight mode. We need to move the value profile pointer to the
1149 // Counter struct to get this working.
1150 assert(
1152 "Value profiling is not yet supported with lightweight instrumentation");
1153 GlobalVariable *Name = Ind->getName();
1154 auto It = ProfileDataMap.find(Name);
1155 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1156 "value profiling detected in function with no counter increment");
1157
1158 GlobalVariable *DataVar = It->second.DataVar;
1159 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1160 uint64_t Index = Ind->getIndex()->getZExtValue();
1161 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1162 Index += It->second.NumValueSites[Kind];
1163
1164 IRBuilder<> Builder(Ind);
1165 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1166 llvm::InstrProfValueKind::IPVK_MemOPSize);
1167 CallInst *Call = nullptr;
1168 auto *TLI = &GetTLI(*Ind->getFunction());
1169 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1170 DataVar, PointerType::get(M.getContext(), 0));
1171
1172 // To support value profiling calls within Windows exception handlers, funclet
1173 // information contained within operand bundles needs to be copied over to
1174 // the library call. This is required for the IR to be processed by the
1175 // WinEHPrepare pass.
1177 Ind->getOperandBundlesAsDefs(OpBundles);
1178 if (!IsMemOpSize) {
1179 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1180 Builder.getInt32(Index)};
1181 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
1182 OpBundles);
1183 } else {
1184 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1185 Builder.getInt32(Index)};
1186 Call = Builder.CreateCall(
1187 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
1188 Args, OpBundles);
1189 }
1190 if (auto AK = TLI->getExtAttrForI32Param(false))
1191 Call->addParamAttr(2, AK);
1193 Ind->eraseFromParent();
1194}
1195
1196GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1197 GlobalVariable *Bias = M.getGlobalVariable(VarName);
1198 if (Bias)
1199 return Bias;
1200
1201 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1202
1203 // Compiler must define this variable when runtime counter relocation
1204 // is being used. Runtime has a weak external reference that is used
1205 // to check whether that's the case or not.
1206 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1207 Constant::getNullValue(Int64Ty), VarName);
1209 // A definition that's weak (linkonce_odr) without being in a COMDAT
1210 // section wouldn't lead to link errors, but it would lead to a dead
1211 // data word from every TU but one. Putting it in COMDAT ensures there
1212 // will be exactly one data slot in the link.
1213 if (TT.supportsCOMDAT())
1214 Bias->setComdat(M.getOrInsertComdat(VarName));
1215
1216 return Bias;
1217}
1218
1219Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1220 auto *Counters = getOrCreateRegionCounters(I);
1221 IRBuilder<> Builder(I);
1222
1224 Counters->setAlignment(Align(8));
1225
1226 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1227 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
1228
1229 if (!isRuntimeCounterRelocationEnabled())
1230 return Addr;
1231
1232 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1233 Function *Fn = I->getParent()->getParent();
1234 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1235 if (!BiasLI) {
1236 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1237 auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
1238 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
1239 // Bias doesn't change after startup.
1240 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1241 MDNode::get(M.getContext(), {}));
1242 }
1243 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
1244 return Builder.CreateIntToPtr(Add, Addr->getType());
1245}
1246
1247Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1248 auto *Bitmaps = getOrCreateRegionBitmaps(I);
1249 if (!isRuntimeCounterRelocationEnabled())
1250 return Bitmaps;
1251
1252 // Put BiasLI onto the entry block.
1253 Type *Int64Ty = Type::getInt64Ty(M.getContext());
1254 Function *Fn = I->getFunction();
1255 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1256 auto *Bias = getOrCreateBiasVar(getInstrProfBitmapBiasVarName());
1257 auto *BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profbm_bias");
1258 // Assume BiasLI invariant (in the function at least)
1259 BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1260 MDNode::get(M.getContext(), {}));
1261
1262 // Add Bias to Bitmaps and put it before the intrinsic.
1263 IRBuilder<> Builder(I);
1264 return Builder.CreatePtrAdd(Bitmaps, BiasLI, "profbm_addr");
1265}
1266
1267void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1268 auto *Addr = getCounterAddress(CoverInstruction);
1269 IRBuilder<> Builder(CoverInstruction);
1270 if (ConditionalCounterUpdate) {
1271 Instruction *SplitBefore = CoverInstruction->getNextNode();
1272 auto &Ctx = CoverInstruction->getParent()->getContext();
1273 auto *Int8Ty = llvm::Type::getInt8Ty(Ctx);
1274 Value *Load = Builder.CreateLoad(Int8Ty, Addr, "pgocount");
1275 Value *Cmp = Builder.CreateIsNotNull(Load, "pgocount.ifnonzero");
1276 Instruction *ThenBranch =
1277 SplitBlockAndInsertIfThen(Cmp, SplitBefore, false);
1278 Builder.SetInsertPoint(ThenBranch);
1279 }
1280
1281 // We store zero to represent that this block is covered.
1282 Builder.CreateStore(Builder.getInt8(0), Addr);
1283 CoverInstruction->eraseFromParent();
1284}
1285
1286void InstrLowerer::lowerTimestamp(
1287 InstrProfTimestampInst *TimestampInstruction) {
1288 assert(TimestampInstruction->getIndex()->isNullValue() &&
1289 "timestamp probes are always the first probe for a function");
1290 auto &Ctx = M.getContext();
1291 auto *TimestampAddr = getCounterAddress(TimestampInstruction);
1292 IRBuilder<> Builder(TimestampInstruction);
1293 auto *CalleeTy =
1294 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
1295 auto Callee = M.getOrInsertFunction(
1297 Builder.CreateCall(Callee, {TimestampAddr});
1298 TimestampInstruction->eraseFromParent();
1299}
1300
1301InstrLowerer::GPUPGOInvariants &
1302InstrLowerer::getOrCreateGPUInvariants(Function *F) {
1303 auto It = GPUInvariantsCache.find(F);
1304 if (It != GPUInvariantsCache.end())
1305 return It->second;
1306
1307 LLVMContext &Context = M.getContext();
1308 auto *Int32Ty = Type::getInt32Ty(Context);
1309
1310 BasicBlock &EntryBB = F->getEntryBlock();
1311 IRBuilder<> Builder(&*EntryBB.getFirstInsertionPt());
1312
1314 if (OffloadPGOSampling > 0) {
1315 FunctionCallee IsSampledFn =
1317 RTLIB::impl___llvm_profile_sampling_gpu),
1318 Int32Ty, Int32Ty);
1319 Value *SampledInt = Builder.CreateCall(
1320 IsSampledFn, {ConstantInt::get(Int32Ty, OffloadPGOSampling)},
1321 "pgo.sampled");
1322 Matched = Builder.CreateICmpNE(SampledInt, ConstantInt::get(Int32Ty, 0),
1323 "pgo.matched");
1324 }
1325
1326 auto &Inv = GPUInvariantsCache[F];
1327 Inv.Matched = Matched;
1328 return Inv;
1329}
1330
1331void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1332 IRBuilder<> Builder(Inc);
1333 if (isGPUProfTarget(M)) {
1334 Function *F = Inc->getFunction();
1335 auto &Inv = getOrCreateGPUInvariants(F);
1336
1337 LLVMContext &Context = M.getContext();
1338 auto *Int64Ty = Type::getInt64Ty(Context);
1339 auto *PtrTy = PointerType::getUnqual(Context);
1340
1341 auto *Addr = getCounterAddress(Inc);
1342
1343 // Store the device wave/warp size into the profile data struct once per
1344 // function. AMDGPU folds llvm.amdgcn.wavefrontsize to the subtarget's
1345 // constant; other GPUs use their fixed warp size.
1346 if (!Inv.WaveSizeStored) {
1347 Inv.WaveSizeStored = true;
1348 GlobalVariable *NamePtr = Inc->getName();
1349 auto &PD = ProfileDataMap[NamePtr];
1350 if (PD.DataVar) {
1351 IRBuilder<> EntryBuilder(&*F->getEntryBlock().getFirstInsertionPt());
1352 Value *WaveSize16 = nullptr;
1353 // Look the intrinsic up by name so this target-agnostic pass does not
1354 // pull in IntrinsicsAMDGPU.h. AMDGPU folds the intrinsic to the
1355 // subtarget's wavefront size; other GPUs fall back to a 32-lane warp.
1356 if (TT.isAMDGPU()) {
1357 Intrinsic::ID WaveSizeID =
1358 Intrinsic::lookupIntrinsicID("llvm.amdgcn.wavefrontsize");
1359 if (WaveSizeID != Intrinsic::not_intrinsic) {
1360 Function *WaveSizeFn =
1361 Intrinsic::getOrInsertDeclaration(&M, WaveSizeID);
1362 Value *WaveSize = EntryBuilder.CreateCall(WaveSizeFn);
1363 WaveSize16 = EntryBuilder.CreateTrunc(
1364 WaveSize, Type::getInt16Ty(Context), "wavesize.i16");
1365 }
1366 }
1367 if (!WaveSize16)
1368 WaveSize16 = ConstantInt::get(Type::getInt16Ty(Context), 32);
1369 Value *WaveSizeAddr = EntryBuilder.CreateStructGEP(
1370 PD.DataVar->getValueType(), PD.DataVar, 9, "profd.wavesize");
1371 EntryBuilder.CreateStore(WaveSize16, WaveSizeAddr);
1372 }
1373 }
1374
1375 GlobalVariable *UniformCounters = getOrCreateUniformCounters(Inc);
1376 Value *UniformAddrArg = ConstantPointerNull::get(PtrTy);
1377 if (UniformCounters) {
1378 Value *UniformIndices[] = {Builder.getInt32(0), Inc->getIndex()};
1379 Value *UniformAddr = Builder.CreateInBoundsGEP(
1380 UniformCounters->getValueType(), UniformCounters, UniformIndices,
1381 "unifctr.addr");
1382 UniformAddrArg =
1383 Builder.CreatePointerBitCastOrAddrSpaceCast(UniformAddr, PtrTy);
1384 }
1385 Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PtrTy);
1386 Value *StepI64 =
1387 Builder.CreateZExtOrTrunc(Inc->getStep(), Int64Ty, "step.i64");
1388
1389 auto *CalleeTy = FunctionType::get(Type::getVoidTy(Context),
1390 {PtrTy, PtrTy, Int64Ty}, false);
1393 RTLIB::impl___llvm_profile_instrument_gpu),
1394 CalleeTy);
1395
1396 if (OffloadPGOSampling > 0) {
1397 BasicBlock *CurBB = Builder.GetInsertBlock();
1398 BasicBlock *ContBB =
1399 CurBB->splitBasicBlock(BasicBlock::iterator(Inc), "po_cont");
1400 BasicBlock *ThenBB = BasicBlock::Create(Context, "po_then", F);
1401
1402 CurBB->getTerminator()->eraseFromParent();
1403 IRBuilder<> HeadBuilder(CurBB);
1404 HeadBuilder.CreateCondBr(Inv.Matched, ThenBB, ContBB);
1405
1406 IRBuilder<> ThenBuilder(ThenBB);
1407 ThenBuilder.CreateCall(Callee, {CastAddr, UniformAddrArg, StepI64});
1408 ThenBuilder.CreateBr(ContBB);
1409 } else {
1410 Builder.CreateCall(Callee, {CastAddr, UniformAddrArg, StepI64});
1411 }
1412 Inc->eraseFromParent();
1413 return;
1414 }
1415
1416 auto *Addr = getCounterAddress(Inc);
1417 // If promotion is enabled then delay generating atomic updates until
1418 // after promotion is done.
1419 if ((!isCounterPromotionEnabled() && isAtomic()) ||
1420 (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
1421 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
1423 } else {
1424 Value *IncStep = Inc->getStep();
1425 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
1426 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
1427 auto *Store = Builder.CreateStore(Count, Addr);
1428 if (isCounterPromotionEnabled())
1429 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
1430 }
1431 Inc->eraseFromParent();
1432}
1433
1434void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1435 ConstantArray *Names =
1436 cast<ConstantArray>(CoverageNamesVar->getInitializer());
1437 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1438 Constant *NC = Names->getOperand(I);
1439 Value *V = NC->stripPointerCasts();
1440 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1442
1443 Name->setLinkage(GlobalValue::PrivateLinkage);
1444 ReferencedNames.push_back(Name);
1445 if (isa<ConstantExpr>(NC))
1446 NC->dropAllReferences();
1447 }
1448 CoverageNamesVar->eraseFromParent();
1449}
1450
1451void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1453 auto &Ctx = M.getContext();
1454 IRBuilder<> Builder(Update);
1455 auto *Int8Ty = Type::getInt8Ty(Ctx);
1456 auto *Int32Ty = Type::getInt32Ty(Ctx);
1457 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1458 auto *BitmapAddr = getBitmapAddress(Update);
1459
1460 // Load Temp Val + BitmapIdx.
1461 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1462 auto *Temp = Builder.CreateAdd(
1463 Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),
1464 Update->getBitmapIndex());
1465
1466 // Calculate byte offset using div8.
1467 // %1 = lshr i32 %mcdc.temp, 3
1468 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
1469
1470 // Add byte offset to section base byte address.
1471 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1472 auto *BitmapByteAddr =
1473 Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);
1474
1475 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1476 // %5 = and i32 %mcdc.temp, 7
1477 // %6 = trunc i32 %5 to i8
1478 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1479
1480 // Shift bit offset left to form a bitmap.
1481 // %7 = shl i8 1, %6
1482 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1483
1484 // Load profile bitmap byte.
1485 // %mcdc.bits = load i8, ptr %4, align 1
1486 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
1487
1488 if (isAtomic()) {
1489 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1490 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1491 // early testing.
1492 auto *Masked = Builder.CreateAnd(Bitmap, ShiftedVal);
1493 auto *ShouldStore = Builder.CreateICmpNE(Masked, ShiftedVal);
1494
1495 // Assume updating will be rare.
1496 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1497 Instruction *ThenBranch =
1498 SplitBlockAndInsertIfThen(ShouldStore, Update, false, Unlikely);
1499
1500 // Execute if (unlikely(ShouldStore)).
1501 Builder.SetInsertPoint(ThenBranch);
1502 Builder.CreateAtomicRMW(AtomicRMWInst::Or, BitmapByteAddr, ShiftedVal,
1504 } else {
1505 // Perform logical OR of profile bitmap byte and shifted bit offset.
1506 // %8 = or i8 %mcdc.bits, %7
1507 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal);
1508
1509 // Store the updated profile bitmap byte.
1510 // store i8 %8, ptr %3, align 1
1511 Builder.CreateStore(Result, BitmapByteAddr);
1512 }
1513
1514 Update->eraseFromParent();
1515}
1516
1517/// Get the name of a profiling variable for a particular function.
1518static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1519 bool &Renamed) {
1520 StringRef NamePrefix = getInstrProfNameVarPrefix();
1521 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1522 Function *F = Inc->getParent()->getParent();
1523 Module *M = F->getParent();
1524 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1526 Renamed = false;
1527 return (Prefix + Name).str();
1528 }
1529 Renamed = true;
1531 SmallVector<char, 24> HashPostfix;
1532 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1533 return (Prefix + Name).str();
1534 return (Prefix + Name + "." + Twine(FuncHash)).str();
1535}
1536
1538 // Only record function addresses if IR PGO is enabled or if clang value
1539 // profiling is enabled. Recording function addresses greatly increases object
1540 // file size, because it prevents the inliner from deleting functions that
1541 // have been inlined everywhere.
1542 if (!profDataReferencedByCode(*F->getParent()))
1543 return false;
1544
1545 // Check the linkage
1546 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1547 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1548 !HasAvailableExternallyLinkage)
1549 return true;
1550
1551 // A function marked 'alwaysinline' with available_externally linkage can't
1552 // have its address taken. Doing so would create an undefined external ref to
1553 // the function, which would fail to link.
1554 if (HasAvailableExternallyLinkage &&
1555 F->hasFnAttribute(Attribute::AlwaysInline))
1556 return false;
1557
1558 // Prohibit function address recording if the function is both internal and
1559 // COMDAT. This avoids the profile data variable referencing internal symbols
1560 // in COMDAT.
1561 if (F->hasLocalLinkage() && F->hasComdat())
1562 return false;
1563
1564 // Check uses of this function for other than direct calls or invokes to it.
1565 // Inline virtual functions have linkeOnceODR linkage. When a key method
1566 // exists, the vtable will only be emitted in the TU where the key method
1567 // is defined. In a TU where vtable is not available, the function won't
1568 // be 'addresstaken'. If its address is not recorded here, the profile data
1569 // with missing address may be picked by the linker leading to missing
1570 // indirect call target info.
1571 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1572}
1573
1574static inline bool shouldUsePublicSymbol(Function *Fn) {
1575 // It isn't legal to make an alias of this function at all
1576 if (Fn->isDeclarationForLinker())
1577 return true;
1578
1579 // Symbols with local linkage can just use the symbol directly without
1580 // introducing relocations
1581 if (Fn->hasLocalLinkage())
1582 return true;
1583
1584 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1585 // unfavorable interaction between the new alias and the alias renaming done
1586 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1587 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1588 // it creates unique names for each alias, resulting in duplicated symbols. In
1589 // the future, we should update the CFI related passes to migrate these
1590 // aliases to the same module as the jump-table they refer to will be defined.
1591 if (Fn->hasMetadata(LLVMContext::MD_type))
1592 return true;
1593
1594 // For comdat functions, an alias would need the same linkage as the original
1595 // function and hidden visibility. There is no point in adding an alias with
1596 // identical linkage an visibility to avoid introducing symbolic relocations.
1597 if (Fn->hasComdat() &&
1599 return true;
1600
1601 // its OK to use an alias
1602 return false;
1603}
1604
1606 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1607 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1608 if (!shouldRecordFunctionAddr(Fn))
1609 return ConstantPointerNull::get(Int8PtrTy);
1610
1611 // If we can't use an alias, we must use the public symbol, even though this
1612 // may require a symbolic relocation.
1613 if (shouldUsePublicSymbol(Fn))
1614 return Fn;
1615
1616 // For GPU targets, weak functions cannot use private aliases because
1617 // LTO may pick a different TU's copy, leaving the alias undefined
1618 if (isGPUProfTarget(*Fn->getParent()) &&
1620 return Fn;
1621
1622 // When possible use a private alias to avoid symbolic relocations.
1624 Fn->getName() + ".local", Fn);
1625
1626 // When the instrumented function is a COMDAT function, we cannot use a
1627 // private alias. If we did, we would create reference to a local label in
1628 // this function's section. If this version of the function isn't selected by
1629 // the linker, then the metadata would introduce a reference to a discarded
1630 // section. So, for COMDAT functions, we need to adjust the linkage of the
1631 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1632 // the dynamic symbol table.
1633 //
1634 // Note that this handles COMDAT functions with visibility other than Hidden,
1635 // since that case is covered in shouldUsePublicSymbol()
1636 if (Fn->hasComdat()) {
1637 GA->setLinkage(Fn->getLinkage());
1639 }
1640
1641 // appendToCompilerUsed(*Fn->getParent(), {GA});
1642
1643 return GA;
1644}
1645
1647 // NVPTX is an ELF target but PTX does not expose sections or linker symbols.
1648 if (TT.isNVPTX())
1649 return true;
1650
1651 // compiler-rt uses linker support to get data/counters/name start/end for
1652 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1653 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1654 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1655 TT.isOSBinFormatWasm())
1656 return false;
1657
1658 return true;
1659}
1660
1661void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1662 StringRef CounterGroupName) {
1663 // Place lowered global variables in a comdat group if the associated function
1664 // or global variable is a COMDAT. This will make sure that only one copy of
1665 // global variable (e.g. function counters) of the COMDAT function will be
1666 // emitted after linking.
1667 bool NeedComdat = needsComdatForCounter(*GO, M);
1668 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1669
1670 if (!UseComdat)
1671 return;
1672
1673 // Keep in mind that this pass may run before the inliner, so we need to
1674 // create a new comdat group (for counters, profiling data, etc). If we use
1675 // the comdat of the parent function, that will result in relocations against
1676 // discarded sections.
1677 //
1678 // If the data variable is referenced by code, non-counter variables (notably
1679 // profiling data) and counters have to be in different comdats for COFF
1680 // because the Visual C++ linker will report duplicate symbol errors if there
1681 // are multiple external symbols with the same name marked
1682 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1683 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1684 ? GV->getName()
1685 : CounterGroupName;
1686 Comdat *C = M.getOrInsertComdat(GroupName);
1687
1688 if (!NeedComdat) {
1689 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1690 //
1691 // For ELF, when not using COMDAT, put counters, data and values into a
1692 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1693 // allows -z start-stop-gc to discard the entire group when the function is
1694 // discarded.
1695 C->setSelectionKind(Comdat::NoDeduplicate);
1696 }
1697 GV->setComdat(C);
1698 // COFF doesn't allow the comdat group leader to have private linkage, so
1699 // upgrade private linkage to internal linkage to produce a symbol table
1700 // entry.
1701 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1703}
1704
1706 if (!profDataReferencedByCode(*GV->getParent()))
1707 return false;
1708
1709 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1711 return true;
1712
1713 // This avoids the profile data from referencing internal symbols in
1714 // COMDAT.
1715 if (GV->hasLocalLinkage() && GV->hasComdat())
1716 return false;
1717
1718 return true;
1719}
1720
1721// FIXME: Introduce an internal alias like what's done for functions to reduce
1722// the number of relocation entries.
1724 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1725 if (!shouldRecordVTableAddr(GV))
1727
1728 return GV;
1729}
1730
1731void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1733 "Value profiling is not supported with lightweight instrumentation");
1735 return;
1736
1737 // Skip llvm internal global variable or __prof variables.
1738 if (GV->getName().starts_with("llvm.") ||
1739 GV->getName().starts_with("__llvm") ||
1740 GV->getName().starts_with("__prof"))
1741 return;
1742
1743 // VTableProfData already created
1744 auto It = VTableDataMap.find(GV);
1745 if (It != VTableDataMap.end() && It->second)
1746 return;
1747
1750
1751 // This is to keep consistent with per-function profile data
1752 // for correctness.
1753 if (TT.isOSBinFormatXCOFF()) {
1755 Visibility = GlobalValue::DefaultVisibility;
1756 }
1757
1758 LLVMContext &Ctx = M.getContext();
1759 Type *DataTypes[] = {
1760#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1762#undef INSTR_PROF_VTABLE_DATA
1763 };
1764
1765 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1766
1767 // Used by INSTR_PROF_VTABLE_DATA MACRO
1768 Constant *VTableAddr = getVTableAddrForProfData(GV);
1769 const std::string PGOVTableName = getPGOName(*GV);
1770 // Record the length of the vtable. This is needed since vtable pointers
1771 // loaded from C++ objects might be from the middle of a vtable definition.
1772 uint32_t VTableSizeVal = GV->getGlobalSize(M.getDataLayout());
1773
1774 Constant *DataVals[] = {
1775#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1777#undef INSTR_PROF_VTABLE_DATA
1778 };
1779
1780 auto *Data =
1781 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1782 ConstantStruct::get(DataTy, DataVals),
1783 getInstrProfVTableVarPrefix() + PGOVTableName);
1784
1785 Data->setVisibility(Visibility);
1786 Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
1787 Data->setAlignment(Align(8));
1788
1789 maybeSetComdat(Data, GV, Data->getName());
1790
1791 VTableDataMap[GV] = Data;
1792
1793 ReferencedVTables.push_back(GV);
1794
1795 // VTable <Hash, Addr> is used by runtime but not referenced by other
1796 // sections. Conservatively mark it linker retained.
1797 UsedVars.push_back(Data);
1798}
1799
1800GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1801 InstrProfSectKind IPSK) {
1802 GlobalVariable *NamePtr = Inc->getName();
1803
1804 // Match the linkage and visibility of the name global.
1805 Function *Fn = Inc->getParent()->getParent();
1807 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1808
1809 // Use internal rather than private linkage so the counter variable shows up
1810 // in the symbol table when using debug info for correlation.
1812 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1814
1815 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1816 // symbols in the same csect won't be discarded. When there are duplicate weak
1817 // symbols, we can NOT guarantee that the relocations get resolved to the
1818 // intended weak symbol, so we can not ensure the correctness of the relative
1819 // CounterPtr, so we have to use private linkage for counter and data symbols.
1820 if (TT.isOSBinFormatXCOFF()) {
1822 Visibility = GlobalValue::DefaultVisibility;
1823 }
1824 // Move the name variable to the right section.
1825 bool Renamed;
1826 GlobalVariable *Ptr;
1827 StringRef VarPrefix;
1828 std::string VarName;
1829 if (IPSK == IPSK_cnts) {
1830 VarPrefix = getInstrProfCountersVarPrefix();
1831 VarName = getVarName(Inc, VarPrefix, Renamed);
1833 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1834 } else if (IPSK == IPSK_bitmap) {
1835 VarPrefix = getInstrProfBitmapVarPrefix();
1836 VarName = getVarName(Inc, VarPrefix, Renamed);
1837 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1839 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1840 } else {
1841 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1842 }
1843
1844 Ptr->setVisibility(Visibility);
1845 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1846 Ptr->setLinkage(Linkage);
1847 if (isGPUProfTarget(M) && !Ptr->hasComdat()) {
1848 Ptr->setComdat(M.getOrInsertComdat(VarName));
1851 } else {
1852 maybeSetComdat(Ptr, Fn, VarName);
1853 }
1854 return Ptr;
1855}
1856
1858InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1859 StringRef Name,
1861 uint64_t NumBytes = Inc->getNumBitmapBytes();
1862 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1863 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1864 Constant::getNullValue(BitmapTy), Name);
1865 GV->setAlignment(Align(1));
1866 return GV;
1867}
1868
1870InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1871 GlobalVariable *NamePtr = Inc->getName();
1872 auto &PD = ProfileDataMap[NamePtr];
1873 if (PD.RegionBitmaps)
1874 return PD.RegionBitmaps;
1875
1876 // If RegionBitmaps doesn't already exist, create it by first setting up
1877 // the corresponding profile section.
1878 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1879 PD.RegionBitmaps = BitmapPtr;
1880 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1881
1882 if (PD.NumBitmapBytes &&
1884 LLVMContext &Ctx = M.getContext();
1885 Function *Fn = Inc->getParent()->getParent();
1886 if (auto *SP = Fn->getSubprogram()) {
1887 DIBuilder DB(M, true, SP->getUnit());
1888 Metadata *FunctionNameAnnotation[] = {
1891 };
1892 Metadata *NumBitmapBitsAnnotation[] = {
1895 };
1896 auto Annotations = DB.getOrCreateArray({
1897 MDNode::get(Ctx, FunctionNameAnnotation),
1898 MDNode::get(Ctx, NumBitmapBitsAnnotation),
1899 });
1900 auto *DICounter = DB.createGlobalVariableExpression(
1901 SP, BitmapPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1902 /*LineNo=*/0, DB.createUnspecifiedType("Profile Bitmap Type"),
1903 BitmapPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1904 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1905 Annotations);
1906 BitmapPtr->addDebugInfo(DICounter);
1907 DB.finalize();
1908 }
1909
1910 // Mark the bitmap variable as used so that it isn't optimized out.
1911 CompilerUsedVars.push_back(PD.RegionBitmaps);
1912 }
1913
1914 return PD.RegionBitmaps;
1915}
1916
1918InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1920 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1921 auto &Ctx = M.getContext();
1922 GlobalVariable *GV;
1923 if (isa<InstrProfCoverInst>(Inc)) {
1924 auto *CounterTy = Type::getInt8Ty(Ctx);
1925 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1926 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1927 std::vector<Constant *> InitialValues(NumCounters,
1928 Constant::getAllOnesValue(CounterTy));
1929 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1930 ConstantArray::get(CounterArrTy, InitialValues),
1931 Name);
1932 GV->setAlignment(Align(1));
1933 } else {
1934 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1935 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1936 Constant::getNullValue(CounterTy), Name);
1937 GV->setAlignment(Align(8));
1938 }
1939 return GV;
1940}
1941
1943InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1944 GlobalVariable *NamePtr = Inc->getName();
1945 auto &PD = ProfileDataMap[NamePtr];
1946 if (PD.RegionCounters)
1947 return PD.RegionCounters;
1948
1949 // If RegionCounters doesn't already exist, create it by first setting up
1950 // the corresponding profile section.
1951 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1952 PD.RegionCounters = CounterPtr;
1953
1955 LLVMContext &Ctx = M.getContext();
1956 Function *Fn = Inc->getParent()->getParent();
1957 if (auto *SP = Fn->getSubprogram()) {
1958 DIBuilder DB(M, true, SP->getUnit());
1959 Metadata *FunctionNameAnnotation[] = {
1962 };
1963 Metadata *CFGHashAnnotation[] = {
1966 };
1967 Metadata *NumCountersAnnotation[] = {
1970 };
1971 auto Annotations = DB.getOrCreateArray({
1972 MDNode::get(Ctx, FunctionNameAnnotation),
1973 MDNode::get(Ctx, CFGHashAnnotation),
1974 MDNode::get(Ctx, NumCountersAnnotation),
1975 });
1976 auto *DICounter = DB.createGlobalVariableExpression(
1977 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1978 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1979 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1980 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1981 Annotations);
1982 CounterPtr->addDebugInfo(DICounter);
1983 DB.finalize();
1984 }
1985
1986 // Mark the counter variable as used so that it isn't optimized out.
1987 CompilerUsedVars.push_back(PD.RegionCounters);
1988 }
1989
1990 // Create uniform counters before the data variable so that
1991 // UniformCounterPtr can reference them in createDataVariable().
1992 getOrCreateUniformCounters(Inc);
1993
1994 // Create the data variable (if it doesn't already exist).
1995 createDataVariable(Inc);
1996
1997 return PD.RegionCounters;
1998}
1999
2001InstrLowerer::getOrCreateUniformCounters(InstrProfCntrInstBase *Inc) {
2002 // Uniform counters are only meaningful for GPU profile targets.
2003 if (!isGPUProfTarget(M))
2004 return nullptr;
2005
2006 GlobalVariable *NamePtr = Inc->getName();
2007 auto &PD = ProfileDataMap[NamePtr];
2008 if (PD.UniformCounters)
2009 return PD.UniformCounters;
2010
2011 assert(PD.RegionCounters && "region counters must be created first");
2012
2013 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
2014
2015 LLVMContext &Ctx = M.getContext();
2016 ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
2017
2018 bool Renamed;
2019 std::string VarName = getVarName(Inc, "__llvm_prf_unifcnt_", Renamed);
2020
2021 auto *GV = new GlobalVariable(M, CounterTy, false, NamePtr->getLinkage(),
2022 Constant::getNullValue(CounterTy), VarName);
2023 GV->setAlignment(Align(8));
2024
2025 GV->setSection(getInstrProfSectionName(IPSK_ucnts, TT.getObjectFormat()));
2026
2027 GV->setComdat(M.getOrInsertComdat(VarName));
2030
2031 PD.UniformCounters = GV;
2032 CompilerUsedVars.push_back(GV);
2033
2034 return PD.UniformCounters;
2035}
2036
2037void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
2038 // When debug information is correlated to profile data, a data variable
2039 // is not needed.
2041 return;
2042
2043 GlobalVariable *NamePtr = Inc->getName();
2044 auto &PD = ProfileDataMap[NamePtr];
2045
2046 // Return if data variable was already created.
2047 if (PD.DataVar)
2048 return;
2049
2050 LLVMContext &Ctx = M.getContext();
2051
2052 Function *Fn = Inc->getParent()->getParent();
2054 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
2055
2056 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
2057 // symbols in the same csect won't be discarded. When there are duplicate weak
2058 // symbols, we can NOT guarantee that the relocations get resolved to the
2059 // intended weak symbol, so we can not ensure the correctness of the relative
2060 // CounterPtr, so we have to use private linkage for counter and data symbols.
2061 if (TT.isOSBinFormatXCOFF()) {
2063 Visibility = GlobalValue::DefaultVisibility;
2064 }
2065
2066 bool NeedComdat = needsComdatForCounter(*Fn, M);
2067 bool Renamed;
2068
2069 // The Data Variable section is anchored to profile counters.
2070 std::string CntsVarName =
2072 std::string DataVarName =
2073 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
2074
2075 auto *Int8PtrTy = PointerType::getUnqual(Ctx);
2076 // Allocate statically the array of pointers to value profile nodes for
2077 // the current function.
2078 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
2079 uint64_t NS = 0;
2080 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2081 NS += PD.NumValueSites[Kind];
2082 if (NS > 0 && ValueProfileStaticAlloc &&
2084 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
2085 auto *ValuesVar = new GlobalVariable(
2086 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
2087 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
2088 ValuesVar->setVisibility(Visibility);
2089 setGlobalVariableLargeSection(TT, *ValuesVar);
2090 ValuesVar->setSection(
2091 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
2092 ValuesVar->setAlignment(Align(8));
2093 maybeSetComdat(ValuesVar, Fn, CntsVarName);
2095 ValuesVar, PointerType::get(Fn->getContext(), 0));
2096 }
2097
2098 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
2099
2100 Constant *CounterPtr = PD.RegionCounters;
2101 Constant *UniformCounterPtr = PD.UniformCounters;
2102
2103 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
2104
2105 // Create data variable.
2106 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
2107 auto *Int16Ty = Type::getInt16Ty(Ctx);
2108 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
2109 auto *DataTy = getProfileDataTy();
2110
2111 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
2112
2113 Constant *Int16ArrayVals[IPVK_Last + 1];
2114 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2115 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
2116
2117 uint16_t OffloadDeviceWaveSizeVal = 0;
2118
2119 if (isGPUProfTarget(M)) {
2120 // For GPU targets, weak functions need weak linkage for their profile data
2121 // aliases to allow linker deduplication across TUs
2123 Linkage = Fn->getLinkage();
2124 else
2127 }
2128 // If the data variable is not referenced by code (if we don't emit
2129 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
2130 // data variable live under linker GC, the data variable can be private. This
2131 // optimization applies to ELF.
2132 //
2133 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
2134 // to be false.
2135 //
2136 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
2137 // that other copies must have the same CFG and cannot have value profiling.
2138 // If no hash suffix, other profd copies may be referenced by code.
2139 if (!isGPUProfTarget(M) && NS == 0 &&
2140 !(DataReferencedByCode && NeedComdat && !Renamed) &&
2141 (TT.isOSBinFormatELF() ||
2142 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
2144 Visibility = GlobalValue::DefaultVisibility;
2145 }
2146 // GPU-target ELF objects are always ET_DYN, so non-local symbols with
2147 // default visibility are preemptible. The CounterPtr label difference
2148 // emits a REL32 relocation that lld rejects against preemptible targets.
2149 if (TT.isGPU() && TT.isOSBinFormatELF() &&
2152 auto *Data =
2153 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
2154
2155 Constant *RelativeCounterPtr;
2156 Constant *RelativeUniformCounterPtr = ConstantInt::get(IntPtrTy, 0);
2157 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
2158 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
2159 InstrProfSectKind DataSectionKind;
2160 // With binary profile correlation, profile data is not loaded into memory.
2161 // profile data must reference profile counter with an absolute relocation.
2163 DataSectionKind = IPSK_covdata;
2164 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
2165 if (BitmapPtr != nullptr)
2166 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
2167 if (UniformCounterPtr != nullptr)
2168 RelativeUniformCounterPtr =
2170 } else if (TT.isNVPTX()) {
2171 // The NVPTX target cannot handle self-referencing constant expressions in
2172 // global initializers at all. Use absolute pointers and have the runtime
2173 // registration convert them to relative offsets.
2174 DataSectionKind = IPSK_data;
2175 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
2176 } else {
2177 // Reference the counter variable with a label difference (link-time
2178 // constant).
2179 DataSectionKind = IPSK_data;
2180 RelativeCounterPtr =
2183 if (BitmapPtr != nullptr)
2184 RelativeBitmapPtr =
2187 if (UniformCounterPtr != nullptr)
2188 RelativeUniformCounterPtr = ConstantExpr::getSub(
2191 }
2192
2193 Constant *DataVals[] = {
2194#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
2196 };
2197 Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
2198
2199 Data->setVisibility(Visibility);
2200 Data->setSection(
2201 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
2202 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
2203 if (isGPUProfTarget(M) && !Data->hasComdat()) {
2204 Data->setComdat(M.getOrInsertComdat(CntsVarName));
2206 } else {
2207 maybeSetComdat(Data, Fn, CntsVarName);
2208 }
2209
2210 PD.DataVar = Data;
2211
2212 // Mark the data variable as used so that it isn't stripped out.
2213 CompilerUsedVars.push_back(Data);
2214 // Now that the linkage set by the FE has been passed to the data and counter
2215 // variables, reset Name variable's linkage and visibility to private so that
2216 // it can be removed later by the compiler.
2218 // Collect the referenced names to be used by emitNameData.
2219 ReferencedNames.push_back(NamePtr);
2220}
2221
2222void InstrLowerer::emitVNodes() {
2223 if (!ValueProfileStaticAlloc)
2224 return;
2225
2226 // For now only support this on platforms that do
2227 // not require runtime registration to discover
2228 // named section start/end.
2230 return;
2231
2232 size_t TotalNS = 0;
2233 for (auto &PD : ProfileDataMap) {
2234 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2235 TotalNS += PD.second.NumValueSites[Kind];
2236 }
2237
2238 if (!TotalNS)
2239 return;
2240
2241 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
2242// Heuristic for small programs with very few total value sites.
2243// The default value of vp-counters-per-site is chosen based on
2244// the observation that large apps usually have a low percentage
2245// of value sites that actually have any profile data, and thus
2246// the average number of counters per site is low. For small
2247// apps with very few sites, this may not be true. Bump up the
2248// number of counters in this case.
2249#define INSTR_PROF_MIN_VAL_COUNTS 10
2250 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
2251 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
2252
2253 auto &Ctx = M.getContext();
2254 Type *VNodeTypes[] = {
2255#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
2257 };
2258 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
2259
2260 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
2261 auto *VNodesVar = new GlobalVariable(
2262 M, VNodesTy, false, GlobalValue::PrivateLinkage,
2264 setGlobalVariableLargeSection(TT, *VNodesVar);
2265 VNodesVar->setSection(
2266 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
2267 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
2268 // VNodesVar is used by runtime but not referenced via relocation by other
2269 // sections. Conservatively make it linker retained.
2270 UsedVars.push_back(VNodesVar);
2271}
2272
2273// Build the per-TU device-PGO sections struct: section start/stop bounds for
2274// names/counters/data/uniform-counters plus the raw version. Returns null if it
2275// already exists.
2277 StringRef CUIDPostfix) {
2278 std::string Name = ("__llvm_profile_sections" + CUIDPostfix).str();
2279 if (M.getNamedValue(Name))
2280 return nullptr;
2281
2282 LLVMContext &Ctx = M.getContext();
2283 unsigned AS = M.getDataLayout().getDefaultGlobalsAddressSpace();
2284 auto Extern = [&](StringRef Sym, Type *Ty, bool IsConst,
2286 GlobalVariable *GV = M.getNamedGlobal(Sym);
2287 if (!GV) {
2288 GV = new GlobalVariable(M, Ty, IsConst, GlobalValue::ExternalLinkage,
2289 nullptr, Sym, nullptr,
2291 GV->setVisibility(Vis);
2292 }
2293 return GV;
2294 };
2295 // Section bounds are hidden i8 markers; raw_version is an i64 constant.
2296 auto *I8 = Type::getInt8Ty(Ctx);
2297 auto Hidden = GlobalValue::HiddenVisibility;
2298 Constant *Fields[] = {Extern("__start___llvm_prf_names", I8, false, Hidden),
2299 Extern("__stop___llvm_prf_names", I8, false, Hidden),
2300 Extern("__start___llvm_prf_cnts", I8, false, Hidden),
2301 Extern("__stop___llvm_prf_cnts", I8, false, Hidden),
2302 Extern("__start___llvm_prf_data", I8, false, Hidden),
2303 Extern("__stop___llvm_prf_data", I8, false, Hidden),
2304 Extern("__start___llvm_prf_ucnts", I8, false, Hidden),
2305 Extern("__stop___llvm_prf_ucnts", I8, false, Hidden),
2306 Extern("__llvm_profile_raw_version",
2307 Type::getInt64Ty(Ctx), true,
2309 auto *PtrTy = PointerType::get(Ctx, AS);
2310 auto *STy = StructType::get(
2311 Ctx, {PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy});
2312 auto *GV = new GlobalVariable(M, STy, /*isConstant=*/true,
2314 ConstantStruct::get(STy, Fields), Name, nullptr,
2316 GV->setVisibility(GlobalValue::ProtectedVisibility);
2317 return GV;
2318}
2319
2320void InstrLowerer::emitNameData() {
2321 if (ReferencedNames.empty())
2322 return;
2323
2324 std::string CompressedNameStr;
2325 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
2327 report_fatal_error(Twine(toString(std::move(E))), false);
2328 }
2329
2330 auto &Ctx = M.getContext();
2331 auto *NamesVal =
2332 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
2333 std::string NamesVarName = std::string(getInstrProfNamesVarName());
2336 std::string GPUCUIDPostfix;
2337 if (isGPUProfTarget(M)) {
2338 if (auto *GV = M.getNamedGlobal(getInstrProfNamesVarPostfixVarName())) {
2339 if (auto *Init =
2341 if (Init->isCString()) {
2342 GPUCUIDPostfix = Init->getAsCString().str();
2343 NamesVarName += GPUCUIDPostfix;
2344 NamesLinkage = GlobalValue::ExternalLinkage;
2345 NamesVisibility = GlobalValue::ProtectedVisibility;
2347 M, [GV](Constant *C) { return C->stripPointerCasts() == GV; });
2348 GV->eraseFromParent();
2349 }
2350 }
2351 }
2352 }
2353 NamesVar = new GlobalVariable(M, NamesVal->getType(), true, NamesLinkage,
2354 NamesVal, NamesVarName);
2355 NamesVar->setVisibility(NamesVisibility);
2356
2357 NamesSize = CompressedNameStr.size();
2358 setGlobalVariableLargeSection(TT, *NamesVar);
2359 std::string NamesSectionName =
2361 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
2362 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat());
2363 NamesVar->setSection(NamesSectionName);
2364 // On COFF, it's important to reduce the alignment down to 1 to prevent the
2365 // linker from inserting padding before the start of the names section or
2366 // between names entries.
2367 NamesVar->setAlignment(Align(1));
2368 // NamesVar is used by runtime but not referenced via relocation by other
2369 // sections. Conservatively make it linker retained.
2370 UsedVars.push_back(NamesVar);
2371
2372 for (auto *NamePtr : ReferencedNames)
2373 NamePtr->eraseFromParent();
2374
2375 // Emit the device sections struct only when this TU produced profile data, so
2376 // its section start/stop references are backed by a real section.
2377 bool HasData = llvm::any_of(ProfileDataMap,
2378 [](const auto &KV) { return KV.second.DataVar; });
2379 if (!GPUCUIDPostfix.empty() && HasData)
2380 if (GlobalVariable *GV = emitGPUOffloadSectionsStruct(M, GPUCUIDPostfix))
2381 CompilerUsedVars.push_back(GV);
2382}
2383
2384void InstrLowerer::emitVTableNames() {
2385 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
2386 return;
2387
2388 // Collect the PGO names of referenced vtables and compress them.
2389 std::string CompressedVTableNames;
2390 if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
2392 report_fatal_error(Twine(toString(std::move(E))), false);
2393 }
2394
2395 auto &Ctx = M.getContext();
2396 auto *VTableNamesVal = ConstantDataArray::getString(
2397 Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
2398 GlobalVariable *VTableNamesVar =
2399 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
2400 GlobalValue::PrivateLinkage, VTableNamesVal,
2402 VTableNamesVar->setSection(
2403 getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
2404 VTableNamesVar->setAlignment(Align(1));
2405 // Make VTableNames linker retained.
2406 UsedVars.push_back(VTableNamesVar);
2407}
2408
2409void InstrLowerer::emitRegistration() {
2411 return;
2412
2413 // Construct the function.
2414 auto *VoidTy = Type::getVoidTy(M.getContext());
2415 auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
2416 auto *Int64Ty = Type::getInt64Ty(M.getContext());
2417 auto *RegisterFTy = FunctionType::get(VoidTy, false);
2418 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
2420 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2421 if (Options.NoRedZone)
2422 RegisterF->addFnAttr(Attribute::NoRedZone);
2423
2424 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
2425 auto *RuntimeRegisterF =
2428
2429 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
2430 for (Value *Data : CompilerUsedVars)
2431 if (!isa<Function>(Data))
2432 // Check for addrspace cast when profiling GPU
2433 IRB.CreateCall(RuntimeRegisterF,
2434 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2435 for (Value *Data : UsedVars)
2436 if (Data != NamesVar && !isa<Function>(Data))
2437 IRB.CreateCall(RuntimeRegisterF,
2438 IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
2439
2440 if (NamesVar) {
2441 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2442 auto *NamesRegisterTy =
2443 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
2444 auto *NamesRegisterF =
2447 IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
2448 NamesVar, VoidPtrTy),
2449 IRB.getInt64(NamesSize)});
2450 }
2451
2452 IRB.CreateRetVoid();
2453}
2454
2455bool InstrLowerer::emitRuntimeHook() {
2456 // GPU profiling data is read directly by the host offload runtime. We do not
2457 // need the standard runtime hook.
2458 if (TT.isGPU())
2459 return false;
2460
2461 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2462 // in which case there is no need to emit the external variable.
2463 if (TT.isOSLinux() || TT.isOSAIX())
2464 return false;
2465
2466 // If the module's provided its own runtime, we don't need to do anything.
2467 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
2468 return false;
2469
2470 // Declare an external variable that will pull in the runtime initialization.
2471 auto *Int32Ty = Type::getInt32Ty(M.getContext());
2472 auto *Var =
2473 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2475 Var->setVisibility(GlobalValue::HiddenVisibility);
2476
2477 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2478 // Mark the user variable as used so that it isn't stripped out.
2479 CompilerUsedVars.push_back(Var);
2480 } else {
2481 // Make a function that uses it.
2482 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
2485 User->addFnAttr(Attribute::NoInline);
2486 if (Options.NoRedZone)
2487 User->addFnAttr(Attribute::NoRedZone);
2488 User->setVisibility(GlobalValue::HiddenVisibility);
2489 if (TT.supportsCOMDAT())
2490 User->setComdat(M.getOrInsertComdat(User->getName()));
2491 // Explicitly mark this function as cold since it is never called.
2492 User->setEntryCount(0);
2493
2494 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
2495 auto *Load = IRB.CreateLoad(Int32Ty, Var);
2496 IRB.CreateRet(Load);
2497
2498 // Mark the function as used so that it isn't stripped out.
2499 CompilerUsedVars.push_back(User);
2500 }
2501 return true;
2502}
2503
2504void InstrLowerer::emitUses() {
2505 // The metadata sections are parallel arrays. Optimizers (e.g.
2506 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2507 // we conservatively retain all unconditionally in the compiler.
2508 //
2509 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2510 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2511 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2512 // and ensure this GC property as well. Otherwise, we have to conservatively
2513 // make all of the sections retained by the linker.
2514 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2515 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2516 appendToCompilerUsed(M, CompilerUsedVars);
2517 else
2518 appendToUsed(M, CompilerUsedVars);
2519
2520 // We do not add proper references from used metadata sections to NamesVar and
2521 // VNodesVar, so we have to be conservative and place them in llvm.used
2522 // regardless of the target,
2523 appendToUsed(M, UsedVars);
2524}
2525
2526void InstrLowerer::emitInitialization() {
2527 // Create ProfileFileName variable. Don't don't this for the
2528 // context-sensitive instrumentation lowering: This lowering is after
2529 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2530 // have already create the variable before LTO/ThinLTO linking.
2531 if (!IsCS)
2532 createProfileFileNameVar(M, Options.InstrProfileOutput);
2533 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
2534 if (!RegisterF)
2535 return;
2536
2537 // Create the initialization function.
2538 auto *VoidTy = Type::getVoidTy(M.getContext());
2539 auto *F = Function::Create(FunctionType::get(VoidTy, false),
2542 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2543 F->addFnAttr(Attribute::NoInline);
2544 if (Options.NoRedZone)
2545 F->addFnAttr(Attribute::NoRedZone);
2546
2547 // Add the basic block and the necessary calls.
2548 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
2549 IRB.CreateCall(RegisterF, {});
2550 IRB.CreateRetVoid();
2551
2552 appendToGlobalCtors(M, F, 0);
2553}
2554
2555namespace llvm {
2556// Create the variable for profile sampling.
2559 IntegerType *SamplingVarTy;
2560 Constant *ValueZero;
2561 if (getSampledInstrumentationConfig().UseShort) {
2562 SamplingVarTy = Type::getInt16Ty(M.getContext());
2563 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
2564 } else {
2565 SamplingVarTy = Type::getInt32Ty(M.getContext());
2566 ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));
2567 }
2568 auto SamplingVar = new GlobalVariable(
2569 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2570 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2571 SamplingVar->setThreadLocal(true);
2572 Triple TT(M.getTargetTriple());
2573 if (TT.supportsCOMDAT()) {
2574 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2575 SamplingVar->setComdat(M.getOrInsertComdat(VarName));
2576 }
2577 appendToCompilerUsed(M, SamplingVar);
2578}
2579} // namespace llvm
2580
2581// For GPU targets: Allocate contiguous arrays for all profile data.
2582// This solves the linker reordering problem by using ONE symbol per section
2583// type, so there's nothing for the linker to reorder.
2584StructType *InstrLowerer::getProfileDataTy() {
2585 if (ProfileDataTy)
2586 return ProfileDataTy;
2587
2588 auto &Ctx = M.getContext();
2589 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
2590 auto *Int16Ty = Type::getInt16Ty(Ctx);
2591 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
2592 Type *DataTypes[] = {
2593#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
2595 };
2596 ProfileDataTy = StructType::get(Ctx, ArrayRef(DataTypes));
2597 return ProfileDataTy;
2598}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_ABI
Definition Compiler.h:215
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
DXIL Finalize Linkage
@ Default
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
#define INSTR_PROF_QUOTE(x)
#define INSTR_PROF_DATA_ALIGNMENT
#define INSTR_PROF_PROFILE_SET_TIMESTAMP
#define INSTR_PROF_PROFILE_SAMPLING_VAR
static bool shouldRecordVTableAddr(GlobalVariable *GV)
static bool shouldRecordFunctionAddr(Function *F)
static bool needsRuntimeHookUnconditionally(const Triple &TT)
static bool containsProfilingIntrinsics(Module &M)
Check if the module contains uses of any profiling intrinsics.
static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, bool &Renamed)
Get the name of a profiling variable for a particular function.
#define INSTR_PROF_MIN_VAL_COUNTS
static Constant * getFuncAddrForProfData(Function *Fn)
static bool shouldUsePublicSymbol(Function *Fn)
static FunctionCallee getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, ValueProfilingCallType CallType=ValueProfilingCallType::Default)
static Constant * getVTableAddrForProfData(GlobalVariable *GV)
static void doAtomicCheck(Function *F)
static GlobalVariable * emitGPUOffloadSectionsStruct(Module &M, StringRef CUIDPostfix)
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT)
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Memory SSA
Definition MemorySSA.cpp:72
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
SmallPtrSet< BasicBlock *, 0 > BlockSet
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Class for arbitrary precision integers.
Definition APInt.h:78
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Annotations lets you mark points and ranges inside source code, for tests:
Definition Annotations.h:67
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
@ Add
*p = old + v
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction & front() const
Definition BasicBlock.h:484
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis providing branch probability information.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
@ NoDeduplicate
No deduplication is performed.
Definition Comdat.h:40
ConstantArray - Constant Array Declarations.
Definition Constants.h:590
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
const BasicBlock & getEntryBlock() const
Definition Function.h:783
DISubprogram * getSubprogram() const
Get the attached subprogram.
const Function & getFunction() const
Definition Function.h:166
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:353
static LLVM_ABI GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition Globals.cpp:630
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void setComdat(Comdat *C)
Definition Globals.cpp:225
bool hasComdat() const
LLVM_ABI void setSection(StringRef S)
Change the section for this global.
Definition Globals.cpp:286
bool hasLinkOnceLinkage() const
VisibilityTypes getVisibility() const
static bool isLocalLinkage(LinkageTypes Linkage)
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:346
LinkageTypes getLinkage() const
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
void setLinkage(LinkageTypes LT)
bool isDeclarationForLinker() const
Module * getParent()
Get the module that this global value is contained inside of...
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition GlobalValue.h:67
@ DefaultVisibility
The GV is visible.
Definition GlobalValue.h:68
@ HiddenVisibility
The GV is hidden.
Definition GlobalValue.h:69
@ ProtectedVisibility
The GV is protected.
Definition GlobalValue.h:70
void setVisibility(VisibilityTypes V)
static bool isWeakForLinker(LinkageTypes Linkage)
Whether the definition of this global may be replaced at link time.
bool hasAvailableExternallyLinkage() const
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition GlobalValue.h:52
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition GlobalValue.h:61
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition GlobalValue.h:57
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition GlobalValue.h:56
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:578
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Globals.cpp:547
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2128
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2227
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition IRBuilder.h:467
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2081
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:175
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2008
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2291
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2368
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:477
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1906
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1511
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1570
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Definition IRBuilder.h:2035
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1919
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1422
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2222
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition IRBuilder.h:2697
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2543
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2096
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:181
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2086
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, bool Elementwise=false)
Definition IRBuilder.h:1970
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
A base class for all instrprof counter intrinsics.
LLVM_ABI ConstantInt * getIndex() const
LLVM_ABI ConstantInt * getNumCounters() const
static LLVM_ABI const char * FunctionNameAttributeName
static LLVM_ABI const char * CFGHashAttributeName
static LLVM_ABI const char * NumCountersAttributeName
static LLVM_ABI const char * NumBitmapBitsAttributeName
This represents the llvm.instrprof.cover intrinsic.
This represents the llvm.instrprof.increment intrinsic.
LLVM_ABI Value * getStep() const
A base class for all instrprof intrinsics.
GlobalVariable * getName() const
ConstantInt * getHash() const
A base class for instrprof mcdc intrinsics that require global bitmap bytes.
ConstantInt * getNumBitmapBits() const
This represents the llvm.instrprof.mcdc.tvbitmap.update intrinsic.
ConstantInt * getBitmapIndex() const
This represents the llvm.instrprof.timestamp intrinsic.
This represents the llvm.instrprof.value.profile intrinsic.
ConstantInt * getIndex() const
ConstantInt * getValueKind() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater.
Definition SSAUpdater.h:149
An instruction for reading from memory.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1554
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:597
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:477
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:828
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI ID lookupIntrinsicID(StringRef Name)
This does the actual lookup of an intrinsic ID which matches the given function name.
constexpr bool isAtomic(const T &...O)
Definition SIDefines.h:383
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
StringRef getInstrProfNameVarPrefix()
Return the name prefix of variables containing instrumented function names.
Definition InstrProf.h:131
RelativeUniformCounterPtr ValuesPtrExpr NumBitmapBytes
Definition InstrProf.h:101
StringRef getInstrProfRuntimeHookVarName()
Return the name of the hook variable defined in profile runtime library.
Definition InstrProf.h:206
UniformCounterPtr
Definition InstrProf.h:82
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void createProfileSamplingVar(Module &M)
StringRef getInstrProfBitmapVarPrefix()
Return the name prefix of profile bitmap variables.
Definition InstrProf.h:143
LLVM_ABI cl::opt< bool > DoInstrProfNameCompression
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
StringRef getInstrProfVTableNamesVarName()
Definition InstrProf.h:159
StringRef getInstrProfDataVarPrefix()
Return the name prefix of variables containing per-function control data.
Definition InstrProf.h:137
RelativeUniformCounterPtr ValuesPtrExpr Int16ArrayTy
Definition InstrProf.h:95
StringRef getCoverageUnusedNamesVarName()
Return the name of the internal variable recording the array of PGO name vars referenced by the cover...
Definition InstrProf.h:172
LLVM_ABI std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
Return the name of the profile section corresponding to IPSK.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI bool needsComdatForCounter(const GlobalObject &GV, const Module &M)
Check if we can use Comdat for profile variables.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FuncHash
Definition InstrProf.h:78
LLVM_ABI std::string getPGOName(const GlobalVariable &V, bool InLTO=false)
StringRef getInstrProfInitFuncName()
Return the name of the runtime initialization method that is generated by the compiler.
Definition InstrProf.h:201
StringRef getInstrProfValuesVarPrefix()
Return the name prefix of value profile variables.
Definition InstrProf.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
StringRef getInstrProfCounterBiasVarName()
Definition InstrProf.h:216
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
StringRef getInstrProfRuntimeHookVarUseFuncName()
Return the name of the compiler generated function that references the runtime hook variable.
Definition InstrProf.h:212
StringRef getInstrProfRegFuncsName()
Return the name of function that registers all the per-function control data at program startup time ...
Definition InstrProf.h:181
LLVM_ABI Error collectPGOFuncNameStrings(ArrayRef< GlobalVariable * > NameVars, std::string &Result, bool doCompression=true)
Produce Result string with the same format described above.
InstrProfSectKind
Definition InstrProf.h:91
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
StringRef getInstrProfCountersVarPrefix()
Return the name prefix of profile counter variables.
Definition InstrProf.h:140
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
inst_range instructions(Function *F)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar)
Return the initializer in string of the PGO name var NameVar.
StringRef getInstrProfBitmapBiasVarName()
Definition InstrProf.h:220
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
StringRef getInstrProfValueProfMemOpFuncName()
Return the name profile runtime entry point to do memop size value profiling.
Definition InstrProf.h:118
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
StringRef getInstrProfNamesRegFuncName()
Return the name of the runtime interface that registers the PGO name strings.
Definition InstrProf.h:193
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
@ Add
Sum of integers.
LLVM_ABI Error collectVTableStrings(ArrayRef< GlobalVariable * > VTables, std::string &Result, bool doCompression)
LLVM_ABI void setGlobalVariableLargeSection(const Triple &TargetTriple, GlobalVariable &GV)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
IntPtrTy
Definition InstrProf.h:82
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
LLVM_ABI bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken=false)
Check if we can safely rename this Comdat function.
LLVM_ABI void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput)
StringRef getInstrProfNamesVarPostfixVarName()
Definition InstrProf.h:155
LLVM_ABI void appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data=nullptr)
Append F to the list of global ctors of module M with the given Priority.
LLVM_ABI bool isPresplitCoroSuspendExitEdge(const BasicBlock &Src, const BasicBlock &Dest)
Definition CFG.cpp:424
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
StringRef getInstrProfValueProfFuncName()
Return the name profile runtime entry point to do value profiling for a given site.
Definition InstrProf.h:112
llvm::cl::opt< llvm::InstrProfCorrelator::ProfCorrelatorKind > ProfileCorrelate
StringRef getInstrProfRegFuncName()
Return the name of the runtime interface that registers per-function control data for one instrumente...
Definition InstrProf.h:187
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
StringRef getInstrProfNamesVarName()
Return the name of the variable holding the strings (possibly compressed) of all function's PGO names...
Definition InstrProf.h:153
LLVM_ABI bool isGPUProfTarget(const Module &M)
Determines whether module targets a GPU eligable for PGO instrumentation.
LLVM_ABI bool isIRPGOFlagSet(const Module *M)
Check if INSTR_PROF_RAW_VERSION_VAR is defined.
StringRef getInstrProfVNodesVarName()
Return the name of value profile node array variables:
Definition InstrProf.h:149
StringRef toStringRef(bool B)
Construct a string ref from a boolean.
cl::opt< bool > EnableVTableValueProfiling("enable-vtable-value-profiling", cl::init(false), cl::desc("If true, the virtual table address will be instrumented to know " "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison."))
@ Extern
Replace returns with jump to thunk, don't emit thunk.
Definition CodeGen.h:163
StringRef getInstrProfVTableVarPrefix()
Return the name prefix of variables containing virtual table profile data.
Definition InstrProf.h:134
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.