LLVM 23.0.0git
InlineCost.cpp
Go to the documentation of this file.
1//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inline cost analysis.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/SetVector.h"
18#include "llvm/ADT/Statistic.h"
33#include "llvm/Config/llvm-config.h"
35#include "llvm/IR/CallingConv.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Dominators.h"
39#include "llvm/IR/GlobalAlias.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/IR/InstVisitor.h"
43#include "llvm/IR/Operator.h"
46#include "llvm/Support/Debug.h"
49#include <climits>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "inline-cost"
56
57STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
58
59static cl::opt<int>
60 DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
61 cl::desc("Default amount of inlining to perform"));
62
63// We introduce this option since there is a minor compile-time win by avoiding
64// addition of TTI attributes (target-features in particular) to inline
65// candidates when they are guaranteed to be the same as top level methods in
66// some use cases. If we avoid adding the attribute, we need an option to avoid
67// checking these attributes.
69 "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
70 cl::desc("Ignore TTI attributes compatibility check between callee/caller "
71 "during inline cost calculation"));
72
74 "print-instruction-comments", cl::Hidden, cl::init(false),
75 cl::desc("Prints comments for instruction based on inline cost analysis"));
76
78 "inline-threshold", cl::Hidden, cl::init(225),
79 cl::desc("Control the amount of inlining to perform (default = 225)"));
80
82 "inlinehint-threshold", cl::Hidden, cl::init(325),
83 cl::desc("Threshold for inlining functions with inline hint"));
84
85static cl::opt<int>
86 ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
87 cl::init(45),
88 cl::desc("Threshold for inlining cold callsites"));
89
91 "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
92 cl::desc("Enable the cost-benefit analysis for the inliner"));
93
94// InlineSavingsMultiplier overrides per TTI multipliers iff it is
95// specified explicitly in command line options. This option is exposed
96// for tuning and testing.
98 "inline-savings-multiplier", cl::Hidden, cl::init(8),
99 cl::desc("Multiplier to multiply cycle savings by during inlining"));
100
101// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
102// specified explicitly in command line options. This option is exposed
103// for tuning and testing.
105 "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
106 cl::desc("A multiplier on top of cycle savings to decide whether the "
107 "savings won't justify the cost"));
108
109static cl::opt<int>
110 InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
111 cl::desc("The maximum size of a callee that get's "
112 "inlined without sufficient cycle savings"));
113
114// We introduce this threshold to help performance of instrumentation based
115// PGO before we actually hook up inliner with analysis passes such as BPI and
116// BFI.
118 "inlinecold-threshold", cl::Hidden, cl::init(45),
119 cl::desc("Threshold for inlining functions with cold attribute"));
120
121static cl::opt<int>
122 HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
123 cl::desc("Threshold for hot callsites "));
124
126 "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
127 cl::desc("Threshold for locally hot callsites "));
128
130 "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
131 cl::desc("Maximum block frequency, expressed as a percentage of caller's "
132 "entry frequency, for a callsite to be cold in the absence of "
133 "profile information."));
134
136 "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
137 cl::desc("Minimum block frequency, expressed as a multiple of caller's "
138 "entry frequency, for a callsite to be hot in the absence of "
139 "profile information."));
140
141static cl::opt<int>
142 InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
143 cl::desc("Cost of a single instruction when inlining"));
144
146 "inline-asm-instr-cost", cl::Hidden, cl::init(0),
147 cl::desc("Cost of a single inline asm instruction when inlining"));
148
149static cl::opt<int>
150 MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
151 cl::desc("Cost of load/store instruction when inlining"));
152
154 "inline-call-penalty", cl::Hidden, cl::init(25),
155 cl::desc("Call penalty that is applied per callsite when inlining"));
156
157static cl::opt<size_t>
158 StackSizeThreshold("inline-max-stacksize", cl::Hidden,
159 cl::init(std::numeric_limits<size_t>::max()),
160 cl::desc("Do not inline functions with a stack size "
161 "that exceeds the specified limit"));
162
164 "recursive-inline-max-stacksize", cl::Hidden,
166 cl::desc("Do not inline recursive functions with a stack "
167 "size that exceeds the specified limit"));
168
170 "inline-cost-full", cl::Hidden,
171 cl::desc("Compute the full inline cost of a call site even when the cost "
172 "exceeds the threshold."));
173
175 "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
176 cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
177 "attributes."));
178
180 "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
181 cl::desc("Disables evaluation of GetElementPtr with constant operands"));
182
184 "inline-all-viable-calls", cl::Hidden, cl::init(false),
185 cl::desc("Inline all viable calls, even if they exceed the inlining "
186 "threshold"));
187namespace llvm {
188std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
189 if (Attr.isValid()) {
190 int AttrValue = 0;
191 if (!Attr.getValueAsString().getAsInteger(10, AttrValue))
192 return AttrValue;
193 }
194 return std::nullopt;
195}
196
197std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
198 return getStringFnAttrAsInt(CB.getFnAttr(AttrKind));
199}
200
201std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {
202 return getStringFnAttrAsInt(F->getFnAttribute(AttrKind));
203}
204
205namespace InlineConstants {
206int getInstrCost() { return InstrCost; }
207
208} // namespace InlineConstants
209
210} // namespace llvm
211
212namespace {
213class InlineCostCallAnalyzer;
214
215// This struct is used to store information about inline cost of a
216// particular instruction
217struct InstructionCostDetail {
218 int CostBefore = 0;
219 int CostAfter = 0;
220 int ThresholdBefore = 0;
221 int ThresholdAfter = 0;
222
223 int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
224
225 int getCostDelta() const { return CostAfter - CostBefore; }
226
227 bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
228};
229
230class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
231private:
232 InlineCostCallAnalyzer *const ICCA;
233
234public:
235 InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
236 void emitInstructionAnnot(const Instruction *I,
237 formatted_raw_ostream &OS) override;
238};
239
240/// Carry out call site analysis, in order to evaluate inlinability.
241/// NOTE: the type is currently used as implementation detail of functions such
242/// as llvm::getInlineCost. Note the function_ref constructor parameters - the
243/// expectation is that they come from the outer scope, from the wrapper
244/// functions. If we want to support constructing CallAnalyzer objects where
245/// lambdas are provided inline at construction, or where the object needs to
246/// otherwise survive past the scope of the provided functions, we need to
247/// revisit the argument types.
248class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249 typedef InstVisitor<CallAnalyzer, bool> Base;
250 friend class InstVisitor<CallAnalyzer, bool>;
251
252protected:
253 virtual ~CallAnalyzer() = default;
254 /// The TargetTransformInfo available for this compilation.
255 const TargetTransformInfo &TTI;
256
257 /// Getter for the cache of @llvm.assume intrinsics.
258 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
259
260 /// Getter for BlockFrequencyInfo
261 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
262
263 /// Getter for TargetLibraryInfo
264 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
265
266 /// Profile summary information.
267 ProfileSummaryInfo *PSI;
268
269 /// The called function.
270 Function &F;
271
272 // Cache the DataLayout since we use it a lot.
273 const DataLayout &DL;
274
275 /// The OptimizationRemarkEmitter available for this compilation.
276 OptimizationRemarkEmitter *ORE;
277
278 /// The candidate callsite being analyzed. Please do not use this to do
279 /// analysis in the caller function; we want the inline cost query to be
280 /// easily cacheable. Instead, use the cover function paramHasAttr.
281 CallBase &CandidateCall;
282
283 /// Getter for the cache of ephemeral values.
284 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
285
286 /// Extension points for handling callsite features.
287 // Called before a basic block was analyzed.
288 virtual void onBlockStart(const BasicBlock *BB) {}
289
290 /// Called after a basic block was analyzed.
291 virtual void onBlockAnalyzed(const BasicBlock *BB) {}
292
293 /// Called before an instruction was analyzed
294 virtual void onInstructionAnalysisStart(const Instruction *I) {}
295
296 /// Called after an instruction was analyzed
297 virtual void onInstructionAnalysisFinish(const Instruction *I) {}
298
299 /// Called at the end of the analysis of the callsite. Return the outcome of
300 /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
301 /// the reason it can't.
302 virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
303 /// Called when we're about to start processing a basic block, and every time
304 /// we are done processing an instruction. Return true if there is no point in
305 /// continuing the analysis (e.g. we've determined already the call site is
306 /// too expensive to inline)
307 virtual bool shouldStop() { return false; }
308
309 /// Called before the analysis of the callee body starts (with callsite
310 /// contexts propagated). It checks callsite-specific information. Return a
311 /// reason analysis can't continue if that's the case, or 'true' if it may
312 /// continue.
313 virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
314 /// Called if the analysis engine decides SROA cannot be done for the given
315 /// alloca.
316 virtual void onDisableSROA(AllocaInst *Arg) {}
317
318 /// Called the analysis engine determines load elimination won't happen.
319 virtual void onDisableLoadElimination() {}
320
321 /// Called when we visit a CallBase, before the analysis starts. Return false
322 /// to stop further processing of the instruction.
323 virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
324
325 /// Called to account for a call.
326 virtual void onCallPenalty() {}
327
328 /// Called to account for a load or store.
329 virtual void onMemAccess(){};
330
331 /// Called to account for the expectation the inlining would result in a load
332 /// elimination.
333 virtual void onLoadEliminationOpportunity() {}
334
335 /// Called to account for the cost of argument setup for the Call in the
336 /// callee's body (not the callsite currently under analysis).
337 virtual void onCallArgumentSetup(const CallBase &Call) {}
338
339 /// Called to account for a load relative intrinsic.
340 virtual void onLoadRelativeIntrinsic() {}
341
342 /// Called to account for a lowered call.
343 virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
344 }
345
346 /// Account for a jump table of given size. Return false to stop further
347 /// processing the switch instruction
348 virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
349
350 /// Account for a case cluster of given size. Return false to stop further
351 /// processing of the instruction.
352 virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
353
354 /// Called at the end of processing a switch instruction, with the given
355 /// number of case clusters.
356 virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
357 bool DefaultDestUnreachable) {}
358
359 /// Called to account for any other instruction not specifically accounted
360 /// for.
361 virtual void onMissedSimplification() {}
362
363 /// Account for inline assembly instructions.
364 virtual void onInlineAsm(const InlineAsm &Arg) {}
365
366 /// Start accounting potential benefits due to SROA for the given alloca.
367 virtual void onInitializeSROAArg(AllocaInst *Arg) {}
368
369 /// Account SROA savings for the AllocaInst value.
370 virtual void onAggregateSROAUse(AllocaInst *V) {}
371
372 bool handleSROA(Value *V, bool DoNotDisable) {
373 // Check for SROA candidates in comparisons.
374 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
375 if (DoNotDisable) {
376 onAggregateSROAUse(SROAArg);
377 return true;
378 }
379 disableSROAForArg(SROAArg);
380 }
381 return false;
382 }
383
384 bool IsCallerRecursive = false;
385 bool IsRecursiveCall = false;
386 bool ExposesReturnsTwice = false;
387 bool HasDynamicAlloca = false;
388 bool ContainsNoDuplicateCall = false;
389 bool HasReturn = false;
390 bool HasIndirectBr = false;
391 bool HasUninlineableIntrinsic = false;
392 bool InitsVargArgs = false;
393
394 /// Number of bytes allocated statically by the callee.
395 uint64_t AllocatedSize = 0;
396 unsigned NumInstructions = 0;
397 unsigned NumInlineAsmInstructions = 0;
398 unsigned NumVectorInstructions = 0;
399
400 /// While we walk the potentially-inlined instructions, we build up and
401 /// maintain a mapping of simplified values specific to this callsite. The
402 /// idea is to propagate any special information we have about arguments to
403 /// this call through the inlinable section of the function, and account for
404 /// likely simplifications post-inlining. The most important aspect we track
405 /// is CFG altering simplifications -- when we prove a basic block dead, that
406 /// can cause dramatic shifts in the cost of inlining a function.
407 /// Note: The simplified Value may be owned by the caller function.
408 DenseMap<Value *, Value *> SimplifiedValues;
409
410 /// Keep track of the values which map back (through function arguments) to
411 /// allocas on the caller stack which could be simplified through SROA.
412 DenseMap<Value *, AllocaInst *> SROAArgValues;
413
414 /// Keep track of Allocas for which we believe we may get SROA optimization.
415 DenseSet<AllocaInst *> EnabledSROAAllocas;
416
417 /// Keep track of values which map to a pointer base and constant offset.
418 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
419
420 /// Keep track of dead blocks due to the constant arguments.
421 SmallPtrSet<BasicBlock *, 16> DeadBlocks;
422
423 /// The mapping of the blocks to their known unique successors due to the
424 /// constant arguments.
425 DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
426
427 /// Model the elimination of repeated loads that is expected to happen
428 /// whenever we simplify away the stores that would otherwise cause them to be
429 /// loads.
430 bool EnableLoadElimination = true;
431
432 /// Whether we allow inlining for recursive call.
433 bool AllowRecursiveCall = false;
434
435 SmallPtrSet<Value *, 16> LoadAddrSet;
436
437 AllocaInst *getSROAArgForValueOrNull(Value *V) const {
438 auto It = SROAArgValues.find(V);
439 if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
440 return nullptr;
441 return It->second;
442 }
443
444 /// Use a value in its given form directly if possible, otherwise try looking
445 /// for it in SimplifiedValues.
446 template <typename T> T *getDirectOrSimplifiedValue(Value *V) const {
447 if (auto *Direct = dyn_cast<T>(V))
448 return Direct;
449 return getSimplifiedValue<T>(V);
450 }
451
452 // Custom simplification helper routines.
453 bool isAllocaDerivedArg(Value *V);
454 void disableSROAForArg(AllocaInst *SROAArg);
455 void disableSROA(Value *V);
456 void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
457 void disableLoadElimination();
458 bool isGEPFree(GetElementPtrInst &GEP);
459 bool canFoldInboundsGEP(GetElementPtrInst &I);
460 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
461 bool simplifyCallSite(Function *F, CallBase &Call);
462 bool simplifyCmpInstForRecCall(CmpInst &Cmp);
463 bool simplifyInstruction(Instruction &I);
464 bool simplifyIntrinsicCallIsConstant(CallBase &CB);
465 bool simplifyIntrinsicCallObjectSize(CallBase &CB);
466 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
467 bool isLoweredToCall(Function *F, CallBase &Call);
468
469 /// Return true if the given argument to the function being considered for
470 /// inlining has the given attribute set either at the call site or the
471 /// function declaration. Primarily used to inspect call site specific
472 /// attributes since these can be more precise than the ones on the callee
473 /// itself.
474 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
475
476 /// Return true if the given value is known non null within the callee if
477 /// inlined through this particular callsite.
478 bool isKnownNonNullInCallee(Value *V);
479
480 /// Return true if size growth is allowed when inlining the callee at \p Call.
481 bool allowSizeGrowth(CallBase &Call);
482
483 // Custom analysis routines.
484 InlineResult analyzeBlock(BasicBlock *BB,
485 const SmallPtrSetImpl<const Value *> &EphValues);
486
487 // Disable several entry points to the visitor so we don't accidentally use
488 // them by declaring but not defining them here.
489 void visit(Module *);
490 void visit(Module &);
491 void visit(Function *);
492 void visit(Function &);
493 void visit(BasicBlock *);
494 void visit(BasicBlock &);
495
496 // Provide base case for our instruction visit.
497 bool visitInstruction(Instruction &I);
498
499 // Our visit overrides.
500 bool visitAlloca(AllocaInst &I);
501 bool visitPHI(PHINode &I);
502 bool visitGetElementPtr(GetElementPtrInst &I);
503 bool visitBitCast(BitCastInst &I);
504 bool visitPtrToInt(PtrToIntInst &I);
505 bool visitIntToPtr(IntToPtrInst &I);
506 bool visitCastInst(CastInst &I);
507 bool visitCmpInst(CmpInst &I);
508 bool visitSub(BinaryOperator &I);
509 bool visitBinaryOperator(BinaryOperator &I);
510 bool visitFNeg(UnaryOperator &I);
511 bool visitLoad(LoadInst &I);
512 bool visitStore(StoreInst &I);
513 bool visitExtractValue(ExtractValueInst &I);
514 bool visitInsertValue(InsertValueInst &I);
515 bool visitCallBase(CallBase &Call);
516 bool visitReturnInst(ReturnInst &RI);
517 bool visitUncondBrInst(UncondBrInst &BI);
518 bool visitCondBrInst(CondBrInst &BI);
519 bool visitSelectInst(SelectInst &SI);
520 bool visitSwitchInst(SwitchInst &SI);
521 bool visitIndirectBrInst(IndirectBrInst &IBI);
522 bool visitResumeInst(ResumeInst &RI);
523 bool visitCleanupReturnInst(CleanupReturnInst &RI);
524 bool visitCatchReturnInst(CatchReturnInst &RI);
525 bool visitUnreachableInst(UnreachableInst &I);
526
527public:
528 CallAnalyzer(
529 Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
530 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
531 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
532 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
533 ProfileSummaryInfo *PSI = nullptr,
534 OptimizationRemarkEmitter *ORE = nullptr,
535 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
536 nullptr)
537 : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
538 GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
539 CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}
540
541 InlineResult analyze();
542
543 /// Lookup simplified Value. May return a value owned by the caller.
544 Value *getSimplifiedValueUnchecked(Value *V) const {
545 return SimplifiedValues.lookup(V);
546 }
547
548 /// Lookup simplified Value, but return nullptr if the simplified value is
549 /// owned by the caller.
550 template <typename T> T *getSimplifiedValue(Value *V) const {
551 Value *SimpleV = SimplifiedValues.lookup(V);
552 if (!SimpleV)
553 return nullptr;
554
555 // Skip checks if we know T is a global. This has a small, but measurable
556 // impact on compile-time.
557 if constexpr (std::is_base_of_v<Constant, T>)
558 return dyn_cast<T>(SimpleV);
559
560 // Make sure the simplified Value is owned by this function
561 if (auto *I = dyn_cast<Instruction>(SimpleV)) {
562 if (I->getFunction() != &F)
563 return nullptr;
564 } else if (auto *Arg = dyn_cast<Argument>(SimpleV)) {
565 if (Arg->getParent() != &F)
566 return nullptr;
567 } else if (!isa<Constant>(SimpleV))
568 return nullptr;
569 return dyn_cast<T>(SimpleV);
570 }
571
572 // Keep a bunch of stats about the cost savings found so we can print them
573 // out when debugging.
574 unsigned NumConstantArgs = 0;
575 unsigned NumConstantOffsetPtrArgs = 0;
576 unsigned NumAllocaArgs = 0;
577 unsigned NumConstantPtrCmps = 0;
578 unsigned NumConstantPtrDiffs = 0;
579 unsigned NumInstructionsSimplified = 0;
580
581 void dump();
582};
583
584// Considering forming a binary search, we should find the number of nodes
585// which is same as the number of comparisons when lowered. For a given
586// number of clusters, n, we can define a recursive function, f(n), to find
587// the number of nodes in the tree. The recursion is :
588// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
589// and f(n) = n, when n <= 3.
590// This will lead a binary tree where the leaf should be either f(2) or f(3)
591// when n > 3. So, the number of comparisons from leaves should be n, while
592// the number of non-leaf should be :
593// 2^(log2(n) - 1) - 1
594// = 2^log2(n) * 2^-1 - 1
595// = n / 2 - 1.
596// Considering comparisons from leaf and non-leaf nodes, we can estimate the
597// number of comparisons in a simple closed form :
598// n + n / 2 - 1 = n * 3 / 2 - 1
599int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
600 return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
601}
602
603/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
604/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
605class InlineCostCallAnalyzer final : public CallAnalyzer {
606 const bool ComputeFullInlineCost;
607 int LoadEliminationCost = 0;
608 /// Bonus to be applied when percentage of vector instructions in callee is
609 /// high (see more details in updateThreshold).
610 int VectorBonus = 0;
611 /// Bonus to be applied when the callee has only one reachable basic block.
612 int SingleBBBonus = 0;
613
614 /// Tunable parameters that control the analysis.
615 const InlineParams &Params;
616
617 // This DenseMap stores the delta change in cost and threshold after
618 // accounting for the given instruction. The map is filled only with the
619 // flag PrintInstructionComments on.
620 DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
621
622 /// Upper bound for the inlining cost. Bonuses are being applied to account
623 /// for speculative "expected profit" of the inlining decision.
624 int Threshold = 0;
625
626 /// The amount of StaticBonus applied.
627 int StaticBonusApplied = 0;
628
629 /// Attempt to evaluate indirect calls to boost its inline cost.
630 const bool BoostIndirectCalls;
631
632 /// Ignore the threshold when finalizing analysis.
633 const bool IgnoreThreshold;
634
635 // True if the cost-benefit-analysis-based inliner is enabled.
636 const bool CostBenefitAnalysisEnabled;
637
638 /// Inlining cost measured in abstract units, accounts for all the
639 /// instructions expected to be executed for a given function invocation.
640 /// Instructions that are statically proven to be dead based on call-site
641 /// arguments are not counted here.
642 int Cost = 0;
643
644 // The cumulative cost at the beginning of the basic block being analyzed. At
645 // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
646 // the size of that basic block.
647 int CostAtBBStart = 0;
648
649 // The static size of live but cold basic blocks. This is "static" in the
650 // sense that it's not weighted by profile counts at all.
651 int ColdSize = 0;
652
653 // Whether inlining is decided by cost-threshold analysis.
654 bool DecidedByCostThreshold = false;
655
656 // Whether inlining is decided by cost-benefit analysis.
657 bool DecidedByCostBenefit = false;
658
659 // The cost-benefit pair computed by cost-benefit analysis.
660 std::optional<CostBenefitPair> CostBenefit;
661
662 bool SingleBB = true;
663
664 unsigned SROACostSavings = 0;
665 unsigned SROACostSavingsLost = 0;
666
667 /// The mapping of caller Alloca values to their accumulated cost savings. If
668 /// we have to disable SROA for one of the allocas, this tells us how much
669 /// cost must be added.
670 DenseMap<AllocaInst *, int> SROAArgCosts;
671
672 /// Return true if \p Call is a cold callsite.
673 bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
674
675 /// Update Threshold based on callsite properties such as callee
676 /// attributes and callee hotness for PGO builds. The Callee is explicitly
677 /// passed to support analyzing indirect calls whose target is inferred by
678 /// analysis.
679 void updateThreshold(CallBase &Call, Function &Callee);
680 /// Return a higher threshold if \p Call is a hot callsite.
681 std::optional<int> getHotCallSiteThreshold(CallBase &Call,
682 BlockFrequencyInfo *CallerBFI);
683
684 /// Handle a capped 'int' increment for Cost.
685 void addCost(int64_t Inc) {
686 Inc = std::clamp<int64_t>(Inc, INT_MIN, INT_MAX);
687 Cost = std::clamp<int64_t>(Inc + Cost, INT_MIN, INT_MAX);
688 }
689
690 void onDisableSROA(AllocaInst *Arg) override {
691 auto CostIt = SROAArgCosts.find(Arg);
692 if (CostIt == SROAArgCosts.end())
693 return;
694 addCost(CostIt->second);
695 SROACostSavings -= CostIt->second;
696 SROACostSavingsLost += CostIt->second;
697 SROAArgCosts.erase(CostIt);
698 }
699
700 void onDisableLoadElimination() override {
701 addCost(LoadEliminationCost);
702 LoadEliminationCost = 0;
703 }
704
705 bool onCallBaseVisitStart(CallBase &Call) override {
706 if (std::optional<int> AttrCallThresholdBonus =
707 getStringFnAttrAsInt(Call, "call-threshold-bonus"))
708 Threshold += *AttrCallThresholdBonus;
709
710 if (std::optional<int> AttrCallCost =
711 getStringFnAttrAsInt(Call, "call-inline-cost")) {
712 addCost(*AttrCallCost);
713 // Prevent further processing of the call since we want to override its
714 // inline cost, not just add to it.
715 return false;
716 }
717 return true;
718 }
719
720 void onCallPenalty() override { addCost(CallPenalty); }
721
722 void onMemAccess() override { addCost(MemAccessCost); }
723
724 void onCallArgumentSetup(const CallBase &Call) override {
725 // Pay the price of the argument setup. We account for the average 1
726 // instruction per call argument setup here.
727 addCost(Call.arg_size() * InstrCost);
728 }
729 void onLoadRelativeIntrinsic() override {
730 // This is normally lowered to 4 LLVM instructions.
731 addCost(3 * InstrCost);
732 }
733 void onLoweredCall(Function *F, CallBase &Call,
734 bool IsIndirectCall) override {
735 // We account for the average 1 instruction per call argument setup here.
736 addCost(Call.arg_size() * InstrCost);
737
738 // If we have a constant that we are calling as a function, we can peer
739 // through it and see the function target. This happens not infrequently
740 // during devirtualization and so we want to give it a hefty bonus for
741 // inlining, but cap that bonus in the event that inlining wouldn't pan out.
742 // Pretend to inline the function, with a custom threshold.
743 if (IsIndirectCall && BoostIndirectCalls) {
744 auto IndirectCallParams = Params;
745 IndirectCallParams.DefaultThreshold =
747 /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
748 /// to instantiate the derived class.
749 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
750 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
751 false);
752 if (CA.analyze().isSuccess()) {
753 // We were able to inline the indirect call! Subtract the cost from the
754 // threshold to get the bonus we want to apply, but don't go below zero.
755 addCost(-std::max(0, CA.getThreshold() - CA.getCost()));
756 }
757 } else
758 // Otherwise simply add the cost for merely making the call.
759 addCost(TTI.getInlineCallPenalty(CandidateCall.getCaller(), Call,
760 CallPenalty));
761 }
762
763 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
764 bool DefaultDestUnreachable) override {
765 // If suitable for a jump table, consider the cost for the table size and
766 // branch to destination.
767 // Maximum valid cost increased in this function.
768 if (JumpTableSize) {
769 // Suppose a default branch includes one compare and one conditional
770 // branch if it's reachable.
771 if (!DefaultDestUnreachable)
772 addCost(2 * InstrCost);
773 // Suppose a jump table requires one load and one jump instruction.
774 int64_t JTCost =
775 static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost;
776 addCost(JTCost);
777 return;
778 }
779
780 if (NumCaseCluster <= 3) {
781 // Suppose a comparison includes one compare and one conditional branch.
782 // We can reduce a set of instructions if the default branch is
783 // undefined.
784 addCost((NumCaseCluster - DefaultDestUnreachable) * 2 * InstrCost);
785 return;
786 }
787
788 int64_t ExpectedNumberOfCompare =
789 getExpectedNumberOfCompare(NumCaseCluster);
790 int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
791
792 addCost(SwitchCost);
793 }
794
795 // Parses the inline assembly argument to account for its cost. Inline
796 // assembly instructions incur higher costs for inlining since they cannot be
797 // analyzed and optimized.
798 void onInlineAsm(const InlineAsm &Arg) override {
800 return;
802 Arg.collectAsmStrs(AsmStrs);
803 int SectionLevel = 0;
804 int InlineAsmInstrCount = 0;
805 for (StringRef AsmStr : AsmStrs) {
806 // Trim whitespaces and comments.
807 StringRef Trimmed = AsmStr.trim();
808 size_t hashPos = Trimmed.find('#');
809 if (hashPos != StringRef::npos)
810 Trimmed = Trimmed.substr(0, hashPos);
811 // Ignore comments.
812 if (Trimmed.empty())
813 continue;
814 // Filter out the outlined assembly instructions from the cost by keeping
815 // track of the section level and only accounting for instrutions at
816 // section level of zero. Note there will be duplication in outlined
817 // sections too, but is not accounted in the inlining cost model.
818 if (Trimmed.starts_with(".pushsection")) {
819 ++SectionLevel;
820 continue;
821 }
822 if (Trimmed.starts_with(".popsection")) {
823 --SectionLevel;
824 continue;
825 }
826 // Ignore directives and labels.
827 if (Trimmed.starts_with(".") || Trimmed.contains(":"))
828 continue;
829 if (SectionLevel == 0)
830 ++InlineAsmInstrCount;
831 }
832 NumInlineAsmInstructions += InlineAsmInstrCount;
833 addCost(InlineAsmInstrCount * InlineAsmInstrCost);
834 }
835
836 void onMissedSimplification() override { addCost(InstrCost); }
837
838 void onInitializeSROAArg(AllocaInst *Arg) override {
839 assert(Arg != nullptr &&
840 "Should not initialize SROA costs for null value.");
841 auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
842 SROACostSavings += SROAArgCost;
843 SROAArgCosts[Arg] = SROAArgCost;
844 }
845
846 void onAggregateSROAUse(AllocaInst *SROAArg) override {
847 auto CostIt = SROAArgCosts.find(SROAArg);
848 assert(CostIt != SROAArgCosts.end() &&
849 "expected this argument to have a cost");
850 CostIt->second += InstrCost;
851 SROACostSavings += InstrCost;
852 }
853
854 void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
855
856 void onBlockAnalyzed(const BasicBlock *BB) override {
857 if (CostBenefitAnalysisEnabled) {
858 // Keep track of the static size of live but cold basic blocks. For now,
859 // we define a cold basic block to be one that's never executed.
860 assert(GetBFI && "GetBFI must be available");
861 BlockFrequencyInfo *BFI = &(GetBFI(F));
862 assert(BFI && "BFI must be available");
863 auto ProfileCount = BFI->getBlockProfileCount(BB);
864 if (*ProfileCount == 0)
865 ColdSize += Cost - CostAtBBStart;
866 }
867
868 auto *TI = BB->getTerminator();
869 // If we had any successors at this point, than post-inlining is likely to
870 // have them as well. Note that we assume any basic blocks which existed
871 // due to branches or switches which folded above will also fold after
872 // inlining.
873 if (SingleBB && TI->getNumSuccessors() > 1) {
874 // Take off the bonus we applied to the threshold.
875 Threshold -= SingleBBBonus;
876 SingleBB = false;
877 }
878 }
879
880 void onInstructionAnalysisStart(const Instruction *I) override {
881 // This function is called to store the initial cost of inlining before
882 // the given instruction was assessed.
884 return;
885 auto &CostDetail = InstructionCostDetailMap[I];
886 CostDetail.CostBefore = Cost;
887 CostDetail.ThresholdBefore = Threshold;
888 }
889
890 void onInstructionAnalysisFinish(const Instruction *I) override {
891 // This function is called to find new values of cost and threshold after
892 // the instruction has been assessed.
894 return;
895 auto &CostDetail = InstructionCostDetailMap[I];
896 CostDetail.CostAfter = Cost;
897 CostDetail.ThresholdAfter = Threshold;
898 }
899
900 bool isCostBenefitAnalysisEnabled() {
901 if (!PSI || !PSI->hasProfileSummary())
902 return false;
903
904 if (!GetBFI)
905 return false;
906
908 // Honor the explicit request from the user.
910 return false;
911 } else {
912 // Otherwise, require instrumentation profile.
913 if (!PSI->hasInstrumentationProfile())
914 return false;
915 }
916
917 auto *Caller = CandidateCall.getParent()->getParent();
918 if (!Caller->getEntryCount())
919 return false;
920
921 BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
922 if (!CallerBFI)
923 return false;
924
925 // For now, limit to hot call site.
926 if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
927 return false;
928
929 // Make sure we have a nonzero entry count.
930 auto EntryCount = F.getEntryCount();
931 if (!EntryCount || !EntryCount->getCount())
932 return false;
933
934 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
935 if (!CalleeBFI)
936 return false;
937
938 return true;
939 }
940
941 // A helper function to choose between command line override and default.
942 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
943 if (InlineSavingsMultiplier.getNumOccurrences())
946 }
947
948 // A helper function to choose between command line override and default.
949 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
950 if (InlineSavingsProfitableMultiplier.getNumOccurrences())
953 }
954
955 void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
956 if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
957 CandidateCall, "inline-cycle-savings-for-test")) {
958 CycleSavings = *AttrCycleSavings;
959 }
960
961 if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
962 CandidateCall, "inline-runtime-cost-for-test")) {
963 Size = *AttrRuntimeCost;
964 }
965 }
966
967 // Determine whether we should inline the given call site, taking into account
968 // both the size cost and the cycle savings. Return std::nullopt if we don't
969 // have sufficient profiling information to determine.
970 std::optional<bool> costBenefitAnalysis() {
971 if (!CostBenefitAnalysisEnabled)
972 return std::nullopt;
973
974 // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
975 // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
976 // falling back to the cost-based metric.
977 // TODO: Improve this hacky condition.
978 if (Threshold == 0)
979 return std::nullopt;
980
981 assert(GetBFI);
982 BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
983 assert(CalleeBFI);
984
985 // The cycle savings expressed as the sum of InstrCost
986 // multiplied by the estimated dynamic count of each instruction we can
987 // avoid. Savings come from the call site cost, such as argument setup and
988 // the call instruction, as well as the instructions that are folded.
989 //
990 // We use 128-bit APInt here to avoid potential overflow. This variable
991 // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
992 // assumes that we can avoid or fold a billion instructions, each with a
993 // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
994 // period on a 4GHz machine.
995 APInt CycleSavings(128, 0);
996
997 for (auto &BB : F) {
998 APInt CurrentSavings(128, 0);
999 for (auto &I : BB) {
1000 if (CondBrInst *BI = dyn_cast<CondBrInst>(&I)) {
1001 // Count a conditional branch as savings if it becomes unconditional.
1002 if (getSimplifiedValue<ConstantInt>(BI->getCondition()))
1003 CurrentSavings += InstrCost;
1004 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
1005 if (getSimplifiedValue<ConstantInt>(SI->getCondition()))
1006 CurrentSavings += InstrCost;
1007 } else if (SimplifiedValues.count(&I)) {
1008 // Count an instruction as savings if we can fold it.
1009 CurrentSavings += InstrCost;
1010 }
1011 }
1012
1013 auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
1014 CurrentSavings *= *ProfileCount;
1015 CycleSavings += CurrentSavings;
1016 }
1017
1018 // Compute the cycle savings per call.
1019 auto EntryProfileCount = F.getEntryCount();
1020 assert(EntryProfileCount && EntryProfileCount->getCount());
1021 auto EntryCount = EntryProfileCount->getCount();
1022 CycleSavings += EntryCount / 2;
1023 CycleSavings = CycleSavings.udiv(EntryCount);
1024
1025 // Compute the total savings for the call site.
1026 auto *CallerBB = CandidateCall.getParent();
1027 BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
1028 CycleSavings += getCallsiteCost(TTI, this->CandidateCall, DL);
1029 CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
1030
1031 // Remove the cost of the cold basic blocks to model the runtime cost more
1032 // accurately. Both machine block placement and function splitting could
1033 // place cold blocks further from hot blocks.
1034 int Size = Cost - ColdSize;
1035
1036 // Allow tiny callees to be inlined regardless of whether they meet the
1037 // savings threshold.
1039
1040 OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
1041 CostBenefit.emplace(APInt(128, Size), CycleSavings);
1042
1043 // Let R be the ratio of CycleSavings to Size. We accept the inlining
1044 // opportunity if R is really high and reject if R is really low. If R is
1045 // somewhere in the middle, we fall back to the cost-based analysis.
1046 //
1047 // Specifically, let R = CycleSavings / Size, we accept the inlining
1048 // opportunity if:
1049 //
1050 // PSI->getOrCompHotCountThreshold()
1051 // R > -------------------------------------------------
1052 // getInliningCostBenefitAnalysisSavingsMultiplier()
1053 //
1054 // and reject the inlining opportunity if:
1055 //
1056 // PSI->getOrCompHotCountThreshold()
1057 // R <= ----------------------------------------------------
1058 // getInliningCostBenefitAnalysisProfitableMultiplier()
1059 //
1060 // Otherwise, we fall back to the cost-based analysis.
1061 //
1062 // Implementation-wise, use multiplication (CycleSavings * Multiplier,
1063 // HotCountThreshold * Size) rather than division to avoid precision loss.
1064 APInt Threshold(128, PSI->getOrCompHotCountThreshold());
1065 Threshold *= Size;
1066
1067 APInt UpperBoundCycleSavings = CycleSavings;
1068 UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
1069 if (UpperBoundCycleSavings.uge(Threshold))
1070 return true;
1071
1072 APInt LowerBoundCycleSavings = CycleSavings;
1073 LowerBoundCycleSavings *=
1074 getInliningCostBenefitAnalysisProfitableMultiplier();
1075 if (LowerBoundCycleSavings.ult(Threshold))
1076 return false;
1077
1078 // Otherwise, fall back to the cost-based analysis.
1079 return std::nullopt;
1080 }
1081
1082 InlineResult finalizeAnalysis() override {
1083 // Loops generally act a lot like calls in that they act like barriers to
1084 // movement, require a certain amount of setup, etc. So when optimising for
1085 // size, we penalise any call sites that perform loops. We do this after all
1086 // other costs here, so will likely only be dealing with relatively small
1087 // functions (and hence DT and LI will hopefully be cheap).
1088 auto *Caller = CandidateCall.getFunction();
1089 if (Caller->hasMinSize()) {
1090 DominatorTree DT(F);
1091 LoopInfo LI(DT);
1092 int NumLoops = 0;
1093 for (Loop *L : LI) {
1094 // Ignore loops that will not be executed
1095 if (DeadBlocks.count(L->getHeader()))
1096 continue;
1097 NumLoops++;
1098 }
1099 addCost(NumLoops * InlineConstants::LoopPenalty);
1100 }
1101
1102 // We applied the maximum possible vector bonus at the beginning. Now,
1103 // subtract the excess bonus, if any, from the Threshold before
1104 // comparing against Cost.
1105 if (NumVectorInstructions <= NumInstructions / 10)
1106 Threshold -= VectorBonus;
1107 else if (NumVectorInstructions <= NumInstructions / 2)
1108 Threshold -= VectorBonus / 2;
1109
1110 if (std::optional<int> AttrCost =
1111 getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
1112 Cost = *AttrCost;
1113
1114 if (std::optional<int> AttrCostMult = getStringFnAttrAsInt(
1115 CandidateCall,
1117 Cost *= *AttrCostMult;
1118
1119 if (std::optional<int> AttrThreshold =
1120 getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
1121 Threshold = *AttrThreshold;
1122
1123 if (auto Result = costBenefitAnalysis()) {
1124 DecidedByCostBenefit = true;
1125 if (*Result)
1126 return InlineResult::success();
1127 else
1128 return InlineResult::failure("Cost over threshold.");
1129 }
1130
1131 if (IgnoreThreshold)
1132 return InlineResult::success();
1133
1134 DecidedByCostThreshold = true;
1135 return Cost < std::max(1, Threshold)
1137 : InlineResult::failure("Cost over threshold.");
1138 }
1139
1140 bool shouldStop() override {
1141 if (IgnoreThreshold || ComputeFullInlineCost)
1142 return false;
1143 // Bail out the moment we cross the threshold. This means we'll under-count
1144 // the cost, but only when undercounting doesn't matter.
1145 if (Cost < Threshold)
1146 return false;
1147 DecidedByCostThreshold = true;
1148 return true;
1149 }
1150
1151 void onLoadEliminationOpportunity() override {
1152 LoadEliminationCost += InstrCost;
1153 }
1154
1155 InlineResult onAnalysisStart() override {
1156 // Perform some tweaks to the cost and threshold based on the direct
1157 // callsite information.
1158
1159 // We want to more aggressively inline vector-dense kernels, so up the
1160 // threshold, and we'll lower it if the % of vector instructions gets too
1161 // low. Note that these bonuses are some what arbitrary and evolved over
1162 // time by accident as much as because they are principled bonuses.
1163 //
1164 // FIXME: It would be nice to remove all such bonuses. At least it would be
1165 // nice to base the bonus values on something more scientific.
1166 assert(NumInstructions == 0);
1167 assert(NumVectorInstructions == 0);
1168
1169 // Update the threshold based on callsite properties
1170 updateThreshold(CandidateCall, F);
1171
1172 // While Threshold depends on commandline options that can take negative
1173 // values, we want to enforce the invariant that the computed threshold and
1174 // bonuses are non-negative.
1175 assert(Threshold >= 0);
1176 assert(SingleBBBonus >= 0);
1177 assert(VectorBonus >= 0);
1178
1179 // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1180 // this Threshold any time, and cost cannot decrease, we can stop processing
1181 // the rest of the function body.
1182 Threshold += (SingleBBBonus + VectorBonus);
1183
1184 // Give out bonuses for the callsite, as the instructions setting them up
1185 // will be gone after inlining.
1186 addCost(-getCallsiteCost(TTI, this->CandidateCall, DL));
1187
1188 // If this function uses the coldcc calling convention, prefer not to inline
1189 // it.
1190 if (F.getCallingConv() == CallingConv::Cold)
1192
1193 LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
1194
1195 // Check if we're done. This can happen due to bonuses and penalties.
1196 if (Cost >= Threshold && !ComputeFullInlineCost)
1197 return InlineResult::failure("high cost");
1198
1199 return InlineResult::success();
1200 }
1201
1202public:
1203 InlineCostCallAnalyzer(
1204 Function &Callee, CallBase &Call, const InlineParams &Params,
1205 const TargetTransformInfo &TTI,
1206 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
1207 function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1208 function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
1209 ProfileSummaryInfo *PSI = nullptr,
1210 OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1211 bool IgnoreThreshold = false,
1212 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1213 nullptr)
1214 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1215 ORE, GetEphValuesCache),
1216 ComputeFullInlineCost(OptComputeFullInlineCost ||
1217 Params.ComputeFullInlineCost || ORE ||
1218 isCostBenefitAnalysisEnabled()),
1219 Params(Params), Threshold(Params.DefaultThreshold),
1220 BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
1221 CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
1222 Writer(this) {
1223 AllowRecursiveCall = *Params.AllowRecursiveCall;
1224 }
1225
1226 /// Annotation Writer for instruction details
1227 InlineCostAnnotationWriter Writer;
1228
1229 void dump();
1230
1231 // Prints the same analysis as dump(), but its definition is not dependent
1232 // on the build.
1233 void print(raw_ostream &OS);
1234
1235 std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
1236 auto It = InstructionCostDetailMap.find(I);
1237 if (It != InstructionCostDetailMap.end())
1238 return It->second;
1239 return std::nullopt;
1240 }
1241
1242 ~InlineCostCallAnalyzer() override = default;
1243 int getThreshold() const { return Threshold; }
1244 int getCost() const { return Cost; }
1245 int getStaticBonusApplied() const { return StaticBonusApplied; }
1246 std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
1247 bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
1248 bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
1249};
1250
1251// Return true if CB is the sole call to local function Callee.
1252static bool isSoleCallToLocalFunction(const CallBase &CB,
1253 const Function &Callee) {
1254 return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
1255 &Callee == CB.getCalledFunction();
1256}
1257
1258class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
1259private:
1260 InlineCostFeatures Cost = {};
1261
1262 // FIXME: These constants are taken from the heuristic-based cost visitor.
1263 // These should be removed entirely in a later revision to avoid reliance on
1264 // heuristics in the ML inliner.
1265 static constexpr int JTCostMultiplier = 2;
1266 static constexpr int CaseClusterCostMultiplier = 2;
1267 static constexpr int SwitchDefaultDestCostMultiplier = 2;
1268 static constexpr int SwitchCostMultiplier = 2;
1269
1270 // FIXME: These are taken from the heuristic-based cost visitor: we should
1271 // eventually abstract these to the CallAnalyzer to avoid duplication.
1272 unsigned SROACostSavingOpportunities = 0;
1273 int VectorBonus = 0;
1274 int SingleBBBonus = 0;
1275 int Threshold = 5;
1276
1277 DenseMap<AllocaInst *, unsigned> SROACosts;
1278
1279 void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
1280 Cost[static_cast<size_t>(Feature)] += Delta;
1281 }
1282
1283 void set(InlineCostFeatureIndex Feature, int64_t Value) {
1284 Cost[static_cast<size_t>(Feature)] = Value;
1285 }
1286
1287 void onDisableSROA(AllocaInst *Arg) override {
1288 auto CostIt = SROACosts.find(Arg);
1289 if (CostIt == SROACosts.end())
1290 return;
1291
1292 increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
1293 SROACostSavingOpportunities -= CostIt->second;
1294 SROACosts.erase(CostIt);
1295 }
1296
1297 void onDisableLoadElimination() override {
1298 set(InlineCostFeatureIndex::load_elimination, 1);
1299 }
1300
1301 void onCallPenalty() override {
1302 increment(InlineCostFeatureIndex::call_penalty, CallPenalty);
1303 }
1304
1305 void onCallArgumentSetup(const CallBase &Call) override {
1306 increment(InlineCostFeatureIndex::call_argument_setup,
1307 Call.arg_size() * InstrCost);
1308 }
1309
1310 void onLoadRelativeIntrinsic() override {
1311 increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost);
1312 }
1313
1314 void onLoweredCall(Function *F, CallBase &Call,
1315 bool IsIndirectCall) override {
1316 increment(InlineCostFeatureIndex::lowered_call_arg_setup,
1317 Call.arg_size() * InstrCost);
1318
1319 if (IsIndirectCall) {
1320 InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
1321 /*HintThreshold*/ {},
1322 /*ColdThreshold*/ {},
1323 /*OptSizeThreshold*/ {},
1324 /*OptMinSizeThreshold*/ {},
1325 /*HotCallSiteThreshold*/ {},
1326 /*LocallyHotCallSiteThreshold*/ {},
1327 /*ColdCallSiteThreshold*/ {},
1328 /*ComputeFullInlineCost*/ true,
1329 /*EnableDeferral*/ true};
1330 IndirectCallParams.DefaultThreshold =
1332
1333 InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1334 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1335 false, true);
1336 if (CA.analyze().isSuccess()) {
1337 increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
1338 CA.getCost());
1339 increment(InlineCostFeatureIndex::nested_inlines, 1);
1340 }
1341 } else {
1342 onCallPenalty();
1343 }
1344 }
1345
1346 void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
1347 bool DefaultDestUnreachable) override {
1348 if (JumpTableSize) {
1349 if (!DefaultDestUnreachable)
1350 increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1351 SwitchDefaultDestCostMultiplier * InstrCost);
1352 int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
1353 JTCostMultiplier * InstrCost;
1354 increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
1355 return;
1356 }
1357
1358 if (NumCaseCluster <= 3) {
1359 increment(InlineCostFeatureIndex::case_cluster_penalty,
1360 (NumCaseCluster - DefaultDestUnreachable) *
1361 CaseClusterCostMultiplier * InstrCost);
1362 return;
1363 }
1364
1365 int64_t ExpectedNumberOfCompare =
1366 getExpectedNumberOfCompare(NumCaseCluster);
1367
1368 int64_t SwitchCost =
1369 ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
1370 increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
1371 }
1372
1373 void onMissedSimplification() override {
1374 increment(InlineCostFeatureIndex::unsimplified_common_instructions,
1375 InstrCost);
1376 }
1377
1378 void onInitializeSROAArg(AllocaInst *Arg) override {
1379 auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
1380 SROACosts[Arg] = SROAArgCost;
1381 SROACostSavingOpportunities += SROAArgCost;
1382 }
1383
1384 void onAggregateSROAUse(AllocaInst *Arg) override {
1385 SROACosts.find(Arg)->second += InstrCost;
1386 SROACostSavingOpportunities += InstrCost;
1387 }
1388
1389 void onBlockAnalyzed(const BasicBlock *BB) override {
1390 if (BB->getTerminator()->getNumSuccessors() > 1)
1391 set(InlineCostFeatureIndex::is_multiple_blocks, 1);
1392 Threshold -= SingleBBBonus;
1393 }
1394
1395 InlineResult finalizeAnalysis() override {
1396 auto *Caller = CandidateCall.getFunction();
1397 if (Caller->hasMinSize()) {
1398 DominatorTree DT(F);
1399 LoopInfo LI(DT);
1400 for (Loop *L : LI) {
1401 // Ignore loops that will not be executed
1402 if (DeadBlocks.count(L->getHeader()))
1403 continue;
1404 increment(InlineCostFeatureIndex::num_loops,
1406 }
1407 }
1408 set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size());
1409 set(InlineCostFeatureIndex::simplified_instructions,
1410 NumInstructionsSimplified);
1411 set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
1412 set(InlineCostFeatureIndex::constant_offset_ptr_args,
1413 NumConstantOffsetPtrArgs);
1414 set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
1415
1416 if (NumVectorInstructions <= NumInstructions / 10)
1417 Threshold -= VectorBonus;
1418 else if (NumVectorInstructions <= NumInstructions / 2)
1419 Threshold -= VectorBonus / 2;
1420
1421 set(InlineCostFeatureIndex::threshold, Threshold);
1422
1423 return InlineResult::success();
1424 }
1425
1426 bool shouldStop() override { return false; }
1427
1428 void onLoadEliminationOpportunity() override {
1429 increment(InlineCostFeatureIndex::load_elimination, 1);
1430 }
1431
1432 InlineResult onAnalysisStart() override {
1433 increment(InlineCostFeatureIndex::callsite_cost,
1434 -1 * getCallsiteCost(TTI, this->CandidateCall, DL));
1435
1436 set(InlineCostFeatureIndex::cold_cc_penalty,
1437 (F.getCallingConv() == CallingConv::Cold));
1438
1439 set(InlineCostFeatureIndex::last_call_to_static_bonus,
1440 isSoleCallToLocalFunction(CandidateCall, F));
1441
1442 // FIXME: we shouldn't repeat this logic in both the Features and Cost
1443 // analyzer - instead, we should abstract it to a common method in the
1444 // CallAnalyzer
1445 int SingleBBBonusPercent = 50;
1446 int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1447 Threshold += TTI.adjustInliningThreshold(&CandidateCall);
1448 Threshold *= TTI.getInliningThresholdMultiplier();
1449 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
1450 VectorBonus = Threshold * VectorBonusPercent / 100;
1451 Threshold += (SingleBBBonus + VectorBonus);
1452
1453 return InlineResult::success();
1454 }
1455
1456public:
1457 InlineCostFeaturesAnalyzer(
1458 const TargetTransformInfo &TTI,
1459 function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
1460 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1461 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
1462 ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
1463 CallBase &Call)
1464 : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1465 PSI) {}
1466
1467 const InlineCostFeatures &features() const { return Cost; }
1468};
1469
1470} // namespace
1471
1472/// Test whether the given value is an Alloca-derived function argument.
1473bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
1474 return SROAArgValues.count(V);
1475}
1476
1477void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
1478 onDisableSROA(SROAArg);
1479 EnabledSROAAllocas.erase(SROAArg);
1480 disableLoadElimination();
1481}
1482
1483void InlineCostAnnotationWriter::emitInstructionAnnot(
1484 const Instruction *I, formatted_raw_ostream &OS) {
1485 // The cost of inlining of the given instruction is printed always.
1486 // The threshold delta is printed only when it is non-zero. It happens
1487 // when we decided to give a bonus at a particular instruction.
1488 std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
1489 if (!Record)
1490 OS << "; No analysis for the instruction";
1491 else {
1492 OS << "; cost before = " << Record->CostBefore
1493 << ", cost after = " << Record->CostAfter
1494 << ", threshold before = " << Record->ThresholdBefore
1495 << ", threshold after = " << Record->ThresholdAfter << ", ";
1496 OS << "cost delta = " << Record->getCostDelta();
1497 if (Record->hasThresholdChanged())
1498 OS << ", threshold delta = " << Record->getThresholdDelta();
1499 }
1500 auto *V = ICCA->getSimplifiedValueUnchecked(const_cast<Instruction *>(I));
1501 if (V) {
1502 OS << ", simplified to ";
1503 V->print(OS, true);
1504 if (auto *VI = dyn_cast<Instruction>(V)) {
1505 if (VI->getFunction() != I->getFunction())
1506 OS << " (caller instruction)";
1507 } else if (auto *VArg = dyn_cast<Argument>(V)) {
1508 if (VArg->getParent() != I->getFunction())
1509 OS << " (caller argument)";
1510 }
1511 }
1512 OS << "\n";
1513}
1514
1515/// If 'V' maps to a SROA candidate, disable SROA for it.
1516void CallAnalyzer::disableSROA(Value *V) {
1517 if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
1518 disableSROAForArg(SROAArg);
1519 }
1520}
1521
1522void CallAnalyzer::disableLoadElimination() {
1523 if (EnableLoadElimination) {
1524 onDisableLoadElimination();
1525 EnableLoadElimination = false;
1526 }
1527}
1528
1529/// Accumulate a constant GEP offset into an APInt if possible.
1530///
1531/// Returns false if unable to compute the offset for any reason. Respects any
1532/// simplified values known during the analysis of this callsite.
1533bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
1534 unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
1535 assert(IntPtrWidth == Offset.getBitWidth());
1536
1538 GTI != GTE; ++GTI) {
1539 ConstantInt *OpC =
1540 getDirectOrSimplifiedValue<ConstantInt>(GTI.getOperand());
1541 if (!OpC)
1542 return false;
1543 if (OpC->isZero())
1544 continue;
1545
1546 // Handle a struct index, which adds its field offset to the pointer.
1547 if (StructType *STy = GTI.getStructTypeOrNull()) {
1548 unsigned ElementIdx = OpC->getZExtValue();
1549 const StructLayout *SL = DL.getStructLayout(STy);
1550 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
1551 continue;
1552 }
1553
1554 APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL));
1555 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
1556 }
1557 return true;
1558}
1559
1560/// Use TTI to check whether a GEP is free.
1561///
1562/// Respects any simplified values known during the analysis of this callsite.
1563bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
1564 SmallVector<Value *, 4> Operands;
1565 Operands.push_back(GEP.getOperand(0));
1566 for (const Use &Op : GEP.indices())
1567 if (Constant *SimpleOp = getSimplifiedValue<Constant>(Op))
1568 Operands.push_back(SimpleOp);
1569 else
1570 Operands.push_back(Op);
1571 return TTI.getInstructionCost(&GEP, Operands,
1574}
1575
1576bool CallAnalyzer::visitAlloca(AllocaInst &I) {
1577 disableSROA(I.getOperand(0));
1578
1579 // Check whether inlining will turn a dynamic alloca into a static
1580 // alloca and handle that case.
1581 if (I.isArrayAllocation()) {
1582 Constant *Size = getSimplifiedValue<Constant>(I.getArraySize());
1583 if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
1584 // Sometimes a dynamic alloca could be converted into a static alloca
1585 // after this constant prop, and become a huge static alloca on an
1586 // unconditional CFG path. Avoid inlining if this is going to happen above
1587 // a threshold.
1588 // FIXME: If the threshold is removed or lowered too much, we could end up
1589 // being too pessimistic and prevent inlining non-problematic code. This
1590 // could result in unintended perf regressions. A better overall strategy
1591 // is needed to track stack usage during inlining.
1592 Type *Ty = I.getAllocatedType();
1593 AllocatedSize = SaturatingMultiplyAdd(
1594 AllocSize->getLimitedValue(),
1595 DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
1597 HasDynamicAlloca = true;
1598 return false;
1599 }
1600 }
1601
1602 if (I.isStaticAlloca()) {
1603 // Accumulate the allocated size if constant and executed once.
1604 // Note: if AllocSize is a vscale value, this is an underestimate of the
1605 // allocated size, and it also requires some of the cost of a dynamic
1606 // alloca, but is recorded here as a constant size alloca.
1607 TypeSize AllocSize = I.getAllocationSize(DL).value_or(TypeSize::getZero());
1608 AllocatedSize = SaturatingAdd(AllocSize.getKnownMinValue(), AllocatedSize);
1609 } else {
1610 // FIXME: This is overly conservative. Dynamic allocas are inefficient for
1611 // a variety of reasons, and so we would like to not inline them into
1612 // functions which don't currently have a dynamic alloca. This simply
1613 // disables inlining altogether in the presence of a dynamic alloca.
1614 HasDynamicAlloca = true;
1615 }
1616
1617 return false;
1618}
1619
1620bool CallAnalyzer::visitPHI(PHINode &I) {
1621 // FIXME: We need to propagate SROA *disabling* through phi nodes, even
1622 // though we don't want to propagate it's bonuses. The idea is to disable
1623 // SROA if it *might* be used in an inappropriate manner.
1624
1625 // Phi nodes are always zero-cost.
1626 // FIXME: Pointer sizes may differ between different address spaces, so do we
1627 // need to use correct address space in the call to getPointerSizeInBits here?
1628 // Or could we skip the getPointerSizeInBits call completely? As far as I can
1629 // see the ZeroOffset is used as a dummy value, so we can probably use any
1630 // bit width for the ZeroOffset?
1631 APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
1632 bool CheckSROA = I.getType()->isPointerTy();
1633
1634 // Track the constant or pointer with constant offset we've seen so far.
1635 Constant *FirstC = nullptr;
1636 std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
1637 Value *FirstV = nullptr;
1638
1639 for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
1640 BasicBlock *Pred = I.getIncomingBlock(i);
1641 // If the incoming block is dead, skip the incoming block.
1642 if (DeadBlocks.count(Pred))
1643 continue;
1644 // If the parent block of phi is not the known successor of the incoming
1645 // block, skip the incoming block.
1646 BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
1647 if (KnownSuccessor && KnownSuccessor != I.getParent())
1648 continue;
1649
1650 Value *V = I.getIncomingValue(i);
1651 // If the incoming value is this phi itself, skip the incoming value.
1652 if (&I == V)
1653 continue;
1654
1655 Constant *C = getDirectOrSimplifiedValue<Constant>(V);
1656
1657 std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
1658 if (!C && CheckSROA)
1659 BaseAndOffset = ConstantOffsetPtrs.lookup(V);
1660
1661 if (!C && !BaseAndOffset.first)
1662 // The incoming value is neither a constant nor a pointer with constant
1663 // offset, exit early.
1664 return true;
1665
1666 if (FirstC) {
1667 if (FirstC == C)
1668 // If we've seen a constant incoming value before and it is the same
1669 // constant we see this time, continue checking the next incoming value.
1670 continue;
1671 // Otherwise early exit because we either see a different constant or saw
1672 // a constant before but we have a pointer with constant offset this time.
1673 return true;
1674 }
1675
1676 if (FirstV) {
1677 // The same logic as above, but check pointer with constant offset here.
1678 if (FirstBaseAndOffset == BaseAndOffset)
1679 continue;
1680 return true;
1681 }
1682
1683 if (C) {
1684 // This is the 1st time we've seen a constant, record it.
1685 FirstC = C;
1686 continue;
1687 }
1688
1689 // The remaining case is that this is the 1st time we've seen a pointer with
1690 // constant offset, record it.
1691 FirstV = V;
1692 FirstBaseAndOffset = BaseAndOffset;
1693 }
1694
1695 // Check if we can map phi to a constant.
1696 if (FirstC) {
1697 SimplifiedValues[&I] = FirstC;
1698 return true;
1699 }
1700
1701 // Check if we can map phi to a pointer with constant offset.
1702 if (FirstBaseAndOffset.first) {
1703 ConstantOffsetPtrs[&I] = std::move(FirstBaseAndOffset);
1704
1705 if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
1706 SROAArgValues[&I] = SROAArg;
1707 }
1708
1709 return true;
1710}
1711
1712/// Check we can fold GEPs of constant-offset call site argument pointers.
1713/// This requires target data and inbounds GEPs.
1714///
1715/// \return true if the specified GEP can be folded.
1716bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
1717 // Check if we have a base + offset for the pointer.
1718 std::pair<Value *, APInt> BaseAndOffset =
1719 ConstantOffsetPtrs.lookup(I.getPointerOperand());
1720 if (!BaseAndOffset.first)
1721 return false;
1722
1723 // Check if the offset of this GEP is constant, and if so accumulate it
1724 // into Offset.
1725 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
1726 return false;
1727
1728 // Add the result as a new mapping to Base + Offset.
1729 ConstantOffsetPtrs[&I] = std::move(BaseAndOffset);
1730
1731 return true;
1732}
1733
1734bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
1735 auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
1736
1737 // Lambda to check whether a GEP's indices are all constant.
1738 auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
1739 for (const Use &Op : GEP.indices())
1740 if (!getDirectOrSimplifiedValue<Constant>(Op))
1741 return false;
1742 return true;
1743 };
1744
1747 return true;
1748
1749 if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
1750 if (SROAArg)
1751 SROAArgValues[&I] = SROAArg;
1752
1753 // Constant GEPs are modeled as free.
1754 return true;
1755 }
1756
1757 // Variable GEPs will require math and will disable SROA.
1758 if (SROAArg)
1759 disableSROAForArg(SROAArg);
1760 return isGEPFree(I);
1761}
1762
1763// Simplify \p Cmp if RHS is const and we can ValueTrack LHS.
1764// This handles the case only when the Cmp instruction is guarding a recursive
1765// call that will cause the Cmp to fail/succeed for the recursive call.
1766bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
1767 // Bail out if LHS is not a function argument or RHS is NOT const:
1768 if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
1769 return false;
1770 auto *CmpOp = Cmp.getOperand(0);
1771 // Make sure that the callsite is recursive:
1772 if (CandidateCall.getCaller() != &F)
1773 return false;
1774 // Only handle the case when the callsite has a single predecessor:
1775 auto *CallBB = CandidateCall.getParent();
1776 auto *Predecessor = CallBB->getSinglePredecessor();
1777 if (!Predecessor)
1778 return false;
1779 // Check if the callsite is guarded by the same Cmp instruction:
1780 auto *Br = dyn_cast<CondBrInst>(Predecessor->getTerminator());
1781 if (!Br || Br->getCondition() != &Cmp)
1782 return false;
1783
1784 // Check if there is any arg of the recursive callsite is affecting the cmp
1785 // instr:
1786 bool ArgFound = false;
1787 Value *FuncArg = nullptr, *CallArg = nullptr;
1788 for (unsigned ArgNum = 0;
1789 ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
1790 FuncArg = F.getArg(ArgNum);
1791 CallArg = CandidateCall.getArgOperand(ArgNum);
1792 if (FuncArg == CmpOp && CallArg != CmpOp) {
1793 ArgFound = true;
1794 break;
1795 }
1796 }
1797 if (!ArgFound)
1798 return false;
1799
1800 // Now we have a recursive call that is guarded by a cmp instruction.
1801 // Check if this cmp can be simplified:
1802 SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1803 CondContext CC(&Cmp);
1804 CC.Invert = (CallBB != Br->getSuccessor(0));
1805 SQ.CC = &CC;
1806 CC.AffectedValues.insert(FuncArg);
1807 Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1808 cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1809 if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1810 // Make sure that the BB of the recursive call is NOT the true successor
1811 // of the icmp. In other words, make sure that the recursion depth is 1.
1812 if ((ConstVal->isOne() && CC.Invert) ||
1813 (ConstVal->isZero() && !CC.Invert)) {
1814 SimplifiedValues[&Cmp] = ConstVal;
1815 return true;
1816 }
1817 }
1818 return false;
1819}
1820
1821/// Simplify \p I if its operands are constants and update SimplifiedValues.
1822bool CallAnalyzer::simplifyInstruction(Instruction &I) {
1824 for (Value *Op : I.operands()) {
1825 Constant *COp = getDirectOrSimplifiedValue<Constant>(Op);
1826 if (!COp)
1827 return false;
1828 COps.push_back(COp);
1829 }
1830 auto *C = ConstantFoldInstOperands(&I, COps, DL);
1831 if (!C)
1832 return false;
1833 SimplifiedValues[&I] = C;
1834 return true;
1835}
1836
1837/// Try to simplify a call to llvm.is.constant.
1838///
1839/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
1840/// we expect calls of this specific intrinsic to be infrequent.
1841///
1842/// FIXME: Given that we know CB's parent (F) caller
1843/// (CandidateCall->getParent()->getParent()), we might be able to determine
1844/// whether inlining F into F's caller would change how the call to
1845/// llvm.is.constant would evaluate.
1846bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
1847 Value *Arg = CB.getArgOperand(0);
1848 auto *C = getDirectOrSimplifiedValue<Constant>(Arg);
1849
1850 Type *RT = CB.getFunctionType()->getReturnType();
1851 SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
1852 return true;
1853}
1854
1855bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
1856 // As per the langref, "The fourth argument to llvm.objectsize determines if
1857 // the value should be evaluated at runtime."
1858 if (cast<ConstantInt>(CB.getArgOperand(3))->isOne())
1859 return false;
1860
1862 /*MustSucceed=*/true);
1864 if (C)
1865 SimplifiedValues[&CB] = C;
1866 return C;
1867}
1868
1869bool CallAnalyzer::visitBitCast(BitCastInst &I) {
1870 // Propagate constants through bitcasts.
1872 return true;
1873
1874 // Track base/offsets through casts
1875 std::pair<Value *, APInt> BaseAndOffset =
1876 ConstantOffsetPtrs.lookup(I.getOperand(0));
1877 // Casts don't change the offset, just wrap it up.
1878 if (BaseAndOffset.first)
1879 ConstantOffsetPtrs[&I] = std::move(BaseAndOffset);
1880
1881 // Also look for SROA candidates here.
1882 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1883 SROAArgValues[&I] = SROAArg;
1884
1885 // Bitcasts are always zero cost.
1886 return true;
1887}
1888
1889bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
1890 // Propagate constants through ptrtoint.
1892 return true;
1893
1894 // Track base/offset pairs when converted to a plain integer provided the
1895 // integer is large enough to represent the pointer.
1896 unsigned IntegerSize = I.getType()->getScalarSizeInBits();
1897 unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
1898 if (IntegerSize == DL.getPointerSizeInBits(AS)) {
1899 std::pair<Value *, APInt> BaseAndOffset =
1900 ConstantOffsetPtrs.lookup(I.getOperand(0));
1901 if (BaseAndOffset.first)
1902 ConstantOffsetPtrs[&I] = std::move(BaseAndOffset);
1903 }
1904
1905 // This is really weird. Technically, ptrtoint will disable SROA. However,
1906 // unless that ptrtoint is *used* somewhere in the live basic blocks after
1907 // inlining, it will be nuked, and SROA should proceed. All of the uses which
1908 // would block SROA would also block SROA if applied directly to a pointer,
1909 // and so we can just add the integer in here. The only places where SROA is
1910 // preserved either cannot fire on an integer, or won't in-and-of themselves
1911 // disable SROA (ext) w/o some later use that we would see and disable.
1912 if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
1913 SROAArgValues[&I] = SROAArg;
1914
1917}
1918
1919bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
1920 // Propagate constants through ptrtoint.
1922 return true;
1923
1924 // Track base/offset pairs when round-tripped through a pointer without
1925 // modifications provided the integer is not too large.
1926 Value *Op = I.getOperand(0);
1927 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
1928 if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
1929 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
1930 if (BaseAndOffset.first)
1931 ConstantOffsetPtrs[&I] = std::move(BaseAndOffset);
1932 }
1933
1934 // "Propagate" SROA here in the same manner as we do for ptrtoint above.
1935 if (auto *SROAArg = getSROAArgForValueOrNull(Op))
1936 SROAArgValues[&I] = SROAArg;
1937
1940}
1941
1942bool CallAnalyzer::visitCastInst(CastInst &I) {
1943 // Propagate constants through casts.
1945 return true;
1946
1947 // Disable SROA in the face of arbitrary casts we don't explicitly list
1948 // elsewhere.
1949 disableSROA(I.getOperand(0));
1950
1951 // If this is a floating-point cast, and the target says this operation
1952 // is expensive, this may eventually become a library call. Treat the cost
1953 // as such.
1954 switch (I.getOpcode()) {
1955 case Instruction::FPTrunc:
1956 case Instruction::FPExt:
1957 case Instruction::UIToFP:
1958 case Instruction::SIToFP:
1959 case Instruction::FPToUI:
1960 case Instruction::FPToSI:
1962 onCallPenalty();
1963 break;
1964 default:
1965 break;
1966 }
1967
1970}
1971
1972bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
1973 return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
1974}
1975
1976bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
1977 // Does the *call site* have the NonNull attribute set on an argument? We
1978 // use the attribute on the call site to memoize any analysis done in the
1979 // caller. This will also trip if the callee function has a non-null
1980 // parameter attribute, but that's a less interesting case because hopefully
1981 // the callee would already have been simplified based on that.
1982 if (Argument *A = dyn_cast<Argument>(V))
1983 if (paramHasAttr(A, Attribute::NonNull))
1984 return true;
1985
1986 // Is this an alloca in the caller? This is distinct from the attribute case
1987 // above because attributes aren't updated within the inliner itself and we
1988 // always want to catch the alloca derived case.
1989 if (isAllocaDerivedArg(V))
1990 // We can actually predict the result of comparisons between an
1991 // alloca-derived value and null. Note that this fires regardless of
1992 // SROA firing.
1993 return true;
1994
1995 return false;
1996}
1997
1998bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
1999 // If the normal destination of the invoke or the parent block of the call
2000 // site is unreachable-terminated, there is little point in inlining this
2001 // unless there is literally zero cost.
2002 // FIXME: Note that it is possible that an unreachable-terminated block has a
2003 // hot entry. For example, in below scenario inlining hot_call_X() may be
2004 // beneficial :
2005 // main() {
2006 // hot_call_1();
2007 // ...
2008 // hot_call_N()
2009 // exit(0);
2010 // }
2011 // For now, we are not handling this corner case here as it is rare in real
2012 // code. In future, we should elaborate this based on BPI and BFI in more
2013 // general threshold adjusting heuristics in updateThreshold().
2014 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
2015 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
2016 return false;
2017 } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
2018 return false;
2019
2020 return true;
2021}
2022
2023bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
2024 BlockFrequencyInfo *CallerBFI) {
2025 // If global profile summary is available, then callsite's coldness is
2026 // determined based on that.
2027 if (PSI && PSI->hasProfileSummary())
2028 return PSI->isColdCallSite(Call, CallerBFI);
2029
2030 // Otherwise we need BFI to be available.
2031 if (!CallerBFI)
2032 return false;
2033
2034 // Determine if the callsite is cold relative to caller's entry. We could
2035 // potentially cache the computation of scaled entry frequency, but the added
2036 // complexity is not worth it unless this scaling shows up high in the
2037 // profiles.
2038 const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
2039 auto CallSiteBB = Call.getParent();
2040 auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2041 auto CallerEntryFreq =
2042 CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
2043 return CallSiteFreq < CallerEntryFreq * ColdProb;
2044}
2045
2046std::optional<int>
2047InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
2048 BlockFrequencyInfo *CallerBFI) {
2049
2050 // If global profile summary is available, then callsite's hotness is
2051 // determined based on that.
2052 if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
2053 return Params.HotCallSiteThreshold;
2054
2055 // Otherwise we need BFI to be available and to have a locally hot callsite
2056 // threshold.
2057 if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
2058 return std::nullopt;
2059
2060 // Determine if the callsite is hot relative to caller's entry. We could
2061 // potentially cache the computation of scaled entry frequency, but the added
2062 // complexity is not worth it unless this scaling shows up high in the
2063 // profiles.
2064 const BasicBlock *CallSiteBB = Call.getParent();
2065 BlockFrequency CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
2066 BlockFrequency CallerEntryFreq = CallerBFI->getEntryFreq();
2067 std::optional<BlockFrequency> Limit = CallerEntryFreq.mul(HotCallSiteRelFreq);
2068 if (Limit && CallSiteFreq >= *Limit)
2069 return Params.LocallyHotCallSiteThreshold;
2070
2071 // Otherwise treat it normally.
2072 return std::nullopt;
2073}
2074
2075void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
2076 // If no size growth is allowed for this inlining, set Threshold to 0.
2077 if (!allowSizeGrowth(Call)) {
2078 Threshold = 0;
2079 return;
2080 }
2081
2083
2084 // return min(A, B) if B is valid.
2085 auto MinIfValid = [](int A, std::optional<int> B) {
2086 return B ? std::min(A, *B) : A;
2087 };
2088
2089 // return max(A, B) if B is valid.
2090 auto MaxIfValid = [](int A, std::optional<int> B) {
2091 return B ? std::max(A, *B) : A;
2092 };
2093
2094 // Various bonus percentages. These are multiplied by Threshold to get the
2095 // bonus values.
2096 // SingleBBBonus: This bonus is applied if the callee has a single reachable
2097 // basic block at the given callsite context. This is speculatively applied
2098 // and withdrawn if more than one basic block is seen.
2099 //
2100 // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
2101 // of the last call to a static function as inlining such functions is
2102 // guaranteed to reduce code size.
2103 //
2104 // These bonus percentages may be set to 0 based on properties of the caller
2105 // and the callsite.
2106 int SingleBBBonusPercent = 50;
2107 int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
2108 int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
2109
2110 // Lambda to set all the above bonus and bonus percentages to 0.
2111 auto DisallowAllBonuses = [&]() {
2112 SingleBBBonusPercent = 0;
2113 VectorBonusPercent = 0;
2114 LastCallToStaticBonus = 0;
2115 };
2116
2117 // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
2118 // and reduce the threshold if the caller has the necessary attribute.
2119 if (Caller->hasMinSize()) {
2120 Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
2121 // For minsize, we want to disable the single BB bonus and the vector
2122 // bonuses, but not the last-call-to-static bonus. Inlining the last call to
2123 // a static function will, at the minimum, eliminate the parameter setup and
2124 // call/return instructions.
2125 SingleBBBonusPercent = 0;
2126 VectorBonusPercent = 0;
2127 } else if (Caller->hasOptSize())
2128 Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
2129
2130 // Adjust the threshold based on inlinehint attribute and profile based
2131 // hotness information if the caller does not have MinSize attribute.
2132 if (!Caller->hasMinSize()) {
2133 if (Callee.hasFnAttribute(Attribute::InlineHint))
2134 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2135
2136 // FIXME: After switching to the new passmanager, simplify the logic below
2137 // by checking only the callsite hotness/coldness as we will reliably
2138 // have local profile information.
2139 //
2140 // Callsite hotness and coldness can be determined if sample profile is
2141 // used (which adds hotness metadata to calls) or if caller's
2142 // BlockFrequencyInfo is available.
2143 BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
2144 auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
2145 if (!Caller->hasOptSize() && HotCallSiteThreshold) {
2146 LLVM_DEBUG(dbgs() << "Hot callsite.\n");
2147 // FIXME: This should update the threshold only if it exceeds the
2148 // current threshold, but AutoFDO + ThinLTO currently relies on this
2149 // behavior to prevent inlining of hot callsites during ThinLTO
2150 // compile phase.
2151 Threshold = *HotCallSiteThreshold;
2152 } else if (isColdCallSite(Call, CallerBFI)) {
2153 LLVM_DEBUG(dbgs() << "Cold callsite.\n");
2154 // Do not apply bonuses for a cold callsite including the
2155 // LastCallToStatic bonus. While this bonus might result in code size
2156 // reduction, it can cause the size of a non-cold caller to increase
2157 // preventing it from being inlined.
2158 DisallowAllBonuses();
2159 Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
2160 } else if (PSI) {
2161 // Use callee's global profile information only if we have no way of
2162 // determining this via callsite information.
2163 if (PSI->isFunctionEntryHot(&Callee)) {
2164 LLVM_DEBUG(dbgs() << "Hot callee.\n");
2165 // If callsite hotness can not be determined, we may still know
2166 // that the callee is hot and treat it as a weaker hint for threshold
2167 // increase.
2168 Threshold = MaxIfValid(Threshold, Params.HintThreshold);
2169 } else if (PSI->isFunctionEntryCold(&Callee)) {
2170 LLVM_DEBUG(dbgs() << "Cold callee.\n");
2171 // Do not apply bonuses for a cold callee including the
2172 // LastCallToStatic bonus. While this bonus might result in code size
2173 // reduction, it can cause the size of a non-cold caller to increase
2174 // preventing it from being inlined.
2175 DisallowAllBonuses();
2176 Threshold = MinIfValid(Threshold, Params.ColdThreshold);
2177 }
2178 }
2179 }
2180
2181 Threshold += TTI.adjustInliningThreshold(&Call);
2182
2183 // Finally, take the target-specific inlining threshold multiplier into
2184 // account.
2185 Threshold *= TTI.getInliningThresholdMultiplier();
2186
2187 SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
2188 VectorBonus = Threshold * VectorBonusPercent / 100;
2189
2190 // If there is only one call of the function, and it has internal linkage,
2191 // the cost of inlining it drops dramatically. It may seem odd to update
2192 // Cost in updateThreshold, but the bonus depends on the logic in this method.
2193 if (isSoleCallToLocalFunction(Call, F)) {
2194 addCost(-LastCallToStaticBonus);
2195 StaticBonusApplied = LastCallToStaticBonus;
2196 }
2197}
2198
2199bool CallAnalyzer::visitCmpInst(CmpInst &I) {
2200 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2201 // First try to handle simplified comparisons.
2203 return true;
2204
2205 // Try to handle comparison that can be simplified using ValueTracking.
2206 if (simplifyCmpInstForRecCall(I))
2207 return true;
2208
2209 if (I.getOpcode() == Instruction::FCmp)
2210 return false;
2211
2212 // Otherwise look for a comparison between constant offset pointers with
2213 // a common base.
2214 Value *LHSBase, *RHSBase;
2215 APInt LHSOffset, RHSOffset;
2216 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2217 if (LHSBase) {
2218 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2219 if (RHSBase && LHSBase == RHSBase) {
2220 // We have common bases, fold the icmp to a constant based on the
2221 // offsets.
2222 SimplifiedValues[&I] = ConstantInt::getBool(
2223 I.getType(),
2224 ICmpInst::compare(LHSOffset, RHSOffset, I.getPredicate()));
2225 ++NumConstantPtrCmps;
2226 return true;
2227 }
2228 }
2229
2230 auto isImplicitNullCheckCmp = [](const CmpInst &I) {
2231 for (auto *User : I.users())
2232 if (auto *Instr = dyn_cast<Instruction>(User))
2233 if (!Instr->getMetadata(LLVMContext::MD_make_implicit))
2234 return false;
2235 return true;
2236 };
2237
2238 // If the comparison is an equality comparison with null, we can simplify it
2239 // if we know the value (argument) can't be null
2240 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) {
2241 if (isKnownNonNullInCallee(I.getOperand(0))) {
2242 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
2243 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
2244 : ConstantInt::getFalse(I.getType());
2245 return true;
2246 }
2247 // Implicit null checks act as unconditional branches and their comparisons
2248 // should be treated as simplified and free of cost.
2249 if (isImplicitNullCheckCmp(I))
2250 return true;
2251 }
2252 return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
2253}
2254
2255bool CallAnalyzer::visitSub(BinaryOperator &I) {
2256 // Try to handle a special case: we can fold computing the difference of two
2257 // constant-related pointers.
2258 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2259 Value *LHSBase, *RHSBase;
2260 APInt LHSOffset, RHSOffset;
2261 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
2262 if (LHSBase) {
2263 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
2264 if (RHSBase && LHSBase == RHSBase) {
2265 // We have common bases, fold the subtract to a constant based on the
2266 // offsets.
2267 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
2268 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
2269 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
2270 SimplifiedValues[&I] = C;
2271 ++NumConstantPtrDiffs;
2272 return true;
2273 }
2274 }
2275 }
2276
2277 // Otherwise, fall back to the generic logic for simplifying and handling
2278 // instructions.
2279 return Base::visitSub(I);
2280}
2281
2282bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
2283 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
2284 Constant *CLHS = getDirectOrSimplifiedValue<Constant>(LHS);
2285 Constant *CRHS = getDirectOrSimplifiedValue<Constant>(RHS);
2286
2287 Value *SimpleV = nullptr;
2288 if (auto FI = dyn_cast<FPMathOperator>(&I))
2289 SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
2290 FI->getFastMathFlags(), DL);
2291 else
2292 SimpleV =
2293 simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
2294
2295 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2296 SimplifiedValues[&I] = C;
2297
2298 if (SimpleV)
2299 return true;
2300
2301 // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
2302 disableSROA(LHS);
2303 disableSROA(RHS);
2304
2305 // If the instruction is floating point, and the target says this operation
2306 // is expensive, this may eventually become a library call. Treat the cost
2307 // as such. Unless it's fneg which can be implemented with an xor.
2308 using namespace llvm::PatternMatch;
2309 if (I.getType()->isFloatingPointTy() &&
2311 !match(&I, m_FNeg(m_Value())))
2312 onCallPenalty();
2313
2314 return false;
2315}
2316
2317bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
2318 Value *Op = I.getOperand(0);
2319 Constant *COp = getDirectOrSimplifiedValue<Constant>(Op);
2320
2321 Value *SimpleV = simplifyFNegInst(
2322 COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
2323
2324 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
2325 SimplifiedValues[&I] = C;
2326
2327 if (SimpleV)
2328 return true;
2329
2330 // Disable any SROA on arguments to arbitrary, unsimplified fneg.
2331 disableSROA(Op);
2332
2333 return false;
2334}
2335
2336bool CallAnalyzer::visitLoad(LoadInst &I) {
2337 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2338 return true;
2339
2340 // If the data is already loaded from this address and hasn't been clobbered
2341 // by any stores or calls, this load is likely to be redundant and can be
2342 // eliminated.
2343 if (EnableLoadElimination &&
2344 !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
2345 onLoadEliminationOpportunity();
2346 return true;
2347 }
2348
2349 onMemAccess();
2350 return false;
2351}
2352
2353bool CallAnalyzer::visitStore(StoreInst &I) {
2354 if (handleSROA(I.getPointerOperand(), I.isSimple()))
2355 return true;
2356
2357 // The store can potentially clobber loads and prevent repeated loads from
2358 // being eliminated.
2359 // FIXME:
2360 // 1. We can probably keep an initial set of eliminatable loads substracted
2361 // from the cost even when we finally see a store. We just need to disable
2362 // *further* accumulation of elimination savings.
2363 // 2. We should probably at some point thread MemorySSA for the callee into
2364 // this and then use that to actually compute *really* precise savings.
2365 disableLoadElimination();
2366
2367 onMemAccess();
2368 return false;
2369}
2370
2371bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
2372 Value *Op = I.getAggregateOperand();
2373
2374 // Special handling, because we want to simplify extractvalue with a
2375 // potential insertvalue from the caller.
2376 if (Value *SimpleOp = getSimplifiedValueUnchecked(Op)) {
2377 SimplifyQuery SQ(DL);
2378 Value *SimpleV = simplifyExtractValueInst(SimpleOp, I.getIndices(), SQ);
2379 if (SimpleV) {
2380 SimplifiedValues[&I] = SimpleV;
2381 return true;
2382 }
2383 }
2384
2385 // SROA can't look through these, but they may be free.
2386 return Base::visitExtractValue(I);
2387}
2388
2389bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
2390 // Constant folding for insert value is trivial.
2392 return true;
2393
2394 // SROA can't look through these, but they may be free.
2395 return Base::visitInsertValue(I);
2396}
2397
2398/// Try to simplify a call site.
2399///
2400/// Takes a concrete function and callsite and tries to actually simplify it by
2401/// analyzing the arguments and call itself with instsimplify. Returns true if
2402/// it has simplified the callsite to some other entity (a constant), making it
2403/// free.
2404bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
2405 // FIXME: Using the instsimplify logic directly for this is inefficient
2406 // because we have to continually rebuild the argument list even when no
2407 // simplifications can be performed. Until that is fixed with remapping
2408 // inside of instsimplify, directly constant fold calls here.
2410 return false;
2411
2412 // Try to re-map the arguments to constants.
2413 SmallVector<Constant *, 4> ConstantArgs;
2414 ConstantArgs.reserve(Call.arg_size());
2415 for (Value *I : Call.args()) {
2416 Constant *C = getDirectOrSimplifiedValue<Constant>(I);
2417 if (!C)
2418 return false; // This argument doesn't map to a constant.
2419
2420 ConstantArgs.push_back(C);
2421 }
2422 if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
2423 SimplifiedValues[&Call] = C;
2424 return true;
2425 }
2426
2427 return false;
2428}
2429
2430bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2431 const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2432 LibFunc LF;
2433 if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2434 return TTI.isLoweredToCall(F);
2435
2436 switch (LF) {
2437 case LibFunc_memcpy_chk:
2438 case LibFunc_memmove_chk:
2439 case LibFunc_mempcpy_chk:
2440 case LibFunc_memset_chk: {
2441 // Calls to __memcpy_chk whose length is known to fit within the object
2442 // size will eventually be replaced by inline stores. Therefore, these
2443 // should not incur a call penalty. This is only really relevant on
2444 // platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2445 // other platforms use memcpy intrinsics, which are already exempt from the
2446 // call penalty.
2447 auto *LenOp = getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(2));
2448 auto *ObjSizeOp =
2449 getDirectOrSimplifiedValue<ConstantInt>(Call.getOperand(3));
2450 if (LenOp && ObjSizeOp &&
2451 LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2452 return false;
2453 }
2454 break;
2455 }
2456 default:
2457 break;
2458 }
2459
2460 return TTI.isLoweredToCall(F);
2461}
2462
2463bool CallAnalyzer::visitCallBase(CallBase &Call) {
2464 if (!onCallBaseVisitStart(Call))
2465 return true;
2466
2467 if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
2468 !F.hasFnAttribute(Attribute::ReturnsTwice)) {
2469 // This aborts the entire analysis.
2470 ExposesReturnsTwice = true;
2471 return false;
2472 }
2473 if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
2474 ContainsNoDuplicateCall = true;
2475
2476 if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
2477 onInlineAsm(*InlineAsmOp);
2478
2480 bool IsIndirectCall = !F;
2481 if (IsIndirectCall) {
2482 // Check if this happens to be an indirect function call to a known function
2483 // in this inline context. If not, we've done all we can.
2485 F = getSimplifiedValue<Function>(Callee);
2486 if (!F || F->getFunctionType() != Call.getFunctionType()) {
2487 onCallArgumentSetup(Call);
2488
2489 if (!Call.onlyReadsMemory())
2490 disableLoadElimination();
2491 return Base::visitCallBase(Call);
2492 }
2493 }
2494
2495 assert(F && "Expected a call to a known function");
2496
2497 // When we have a concrete function, first try to simplify it directly.
2498 if (simplifyCallSite(F, Call))
2499 return true;
2500
2501 // Next check if it is an intrinsic we know about.
2502 // FIXME: Lift this into part of the InstVisitor.
2503 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
2504 switch (II->getIntrinsicID()) {
2505 default:
2507 disableLoadElimination();
2508 return Base::visitCallBase(Call);
2509
2510 case Intrinsic::load_relative:
2511 onLoadRelativeIntrinsic();
2512 return false;
2513
2514 case Intrinsic::memset:
2515 case Intrinsic::memcpy:
2516 case Intrinsic::memmove:
2517 disableLoadElimination();
2518 // SROA can usually chew through these intrinsics, but they aren't free.
2519 return false;
2520 case Intrinsic::icall_branch_funnel:
2521 case Intrinsic::localescape:
2522 HasUninlineableIntrinsic = true;
2523 return false;
2524 case Intrinsic::vastart:
2525 InitsVargArgs = true;
2526 return false;
2527 case Intrinsic::launder_invariant_group:
2528 case Intrinsic::strip_invariant_group:
2529 if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
2530 SROAArgValues[II] = SROAArg;
2531 return true;
2532 case Intrinsic::is_constant:
2533 return simplifyIntrinsicCallIsConstant(Call);
2534 case Intrinsic::objectsize:
2535 return simplifyIntrinsicCallObjectSize(Call);
2536 }
2537 }
2538
2539 if (F == Call.getFunction()) {
2540 // This flag will fully abort the analysis, so don't bother with anything
2541 // else.
2542 IsRecursiveCall = true;
2543 if (!AllowRecursiveCall)
2544 return false;
2545 }
2546
2547 if (isLoweredToCall(F, Call)) {
2548 onLoweredCall(F, Call, IsIndirectCall);
2549 }
2550
2551 if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
2552 disableLoadElimination();
2553 return Base::visitCallBase(Call);
2554}
2555
2556bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
2557 // At least one return instruction will be free after inlining.
2558 bool Free = !HasReturn;
2559 HasReturn = true;
2560 return Free;
2561}
2562
2563bool CallAnalyzer::visitUncondBrInst(UncondBrInst &BI) {
2564 // We model unconditional branches as essentially free -- they really
2565 // shouldn't exist at all, but handling them makes the behavior of the
2566 // inliner more regular and predictable.
2567 return true;
2568}
2569
2570bool CallAnalyzer::visitCondBrInst(CondBrInst &BI) {
2571 // Conditional branches which will fold away are free.
2572 return getDirectOrSimplifiedValue<ConstantInt>(BI.getCondition()) ||
2573 BI.getMetadata(LLVMContext::MD_make_implicit);
2574}
2575
2576bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
2577 bool CheckSROA = SI.getType()->isPointerTy();
2578 Value *TrueVal = SI.getTrueValue();
2579 Value *FalseVal = SI.getFalseValue();
2580
2581 Constant *TrueC = getDirectOrSimplifiedValue<Constant>(TrueVal);
2582 Constant *FalseC = getDirectOrSimplifiedValue<Constant>(FalseVal);
2583 Constant *CondC = getSimplifiedValue<Constant>(SI.getCondition());
2584
2585 if (!CondC) {
2586 // Select C, X, X => X
2587 if (TrueC == FalseC && TrueC) {
2588 SimplifiedValues[&SI] = TrueC;
2589 return true;
2590 }
2591
2592 if (!CheckSROA)
2593 return Base::visitSelectInst(SI);
2594
2595 std::pair<Value *, APInt> TrueBaseAndOffset =
2596 ConstantOffsetPtrs.lookup(TrueVal);
2597 std::pair<Value *, APInt> FalseBaseAndOffset =
2598 ConstantOffsetPtrs.lookup(FalseVal);
2599 if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
2600 ConstantOffsetPtrs[&SI] = std::move(TrueBaseAndOffset);
2601
2602 if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
2603 SROAArgValues[&SI] = SROAArg;
2604 return true;
2605 }
2606
2607 return Base::visitSelectInst(SI);
2608 }
2609
2610 // Select condition is a constant.
2611 Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
2612 : (CondC->isNullValue()) ? FalseVal
2613 : nullptr;
2614 if (!SelectedV) {
2615 // Condition is a vector constant that is not all 1s or all 0s. If all
2616 // operands are constants, ConstantFoldSelectInstruction() can handle the
2617 // cases such as select vectors.
2618 if (TrueC && FalseC) {
2619 if (auto *C = ConstantFoldSelectInstruction(CondC, TrueC, FalseC)) {
2620 SimplifiedValues[&SI] = C;
2621 return true;
2622 }
2623 }
2624 return Base::visitSelectInst(SI);
2625 }
2626
2627 // Condition is either all 1s or all 0s. SI can be simplified.
2628 if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
2629 SimplifiedValues[&SI] = SelectedC;
2630 return true;
2631 }
2632
2633 if (!CheckSROA)
2634 return true;
2635
2636 std::pair<Value *, APInt> BaseAndOffset =
2637 ConstantOffsetPtrs.lookup(SelectedV);
2638 if (BaseAndOffset.first) {
2639 ConstantOffsetPtrs[&SI] = std::move(BaseAndOffset);
2640
2641 if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
2642 SROAArgValues[&SI] = SROAArg;
2643 }
2644
2645 return true;
2646}
2647
2648bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
2649 // We model unconditional switches as free, see the comments on handling
2650 // branches.
2651 if (getDirectOrSimplifiedValue<ConstantInt>(SI.getCondition()))
2652 return true;
2653
2654 // Assume the most general case where the switch is lowered into
2655 // either a jump table, bit test, or a balanced binary tree consisting of
2656 // case clusters without merging adjacent clusters with the same
2657 // destination. We do not consider the switches that are lowered with a mix
2658 // of jump table/bit test/binary search tree. The cost of the switch is
2659 // proportional to the size of the tree or the size of jump table range.
2660 //
2661 // NB: We convert large switches which are just used to initialize large phi
2662 // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
2663 // inlining those. It will prevent inlining in cases where the optimization
2664 // does not (yet) fire.
2665
2666 unsigned JumpTableSize = 0;
2667 BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
2668 unsigned NumCaseCluster =
2669 TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
2670
2671 onFinalizeSwitch(JumpTableSize, NumCaseCluster, SI.defaultDestUnreachable());
2672 return false;
2673}
2674
2675bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
2676 // We never want to inline functions that contain an indirectbr. This is
2677 // incorrect because all the blockaddress's (in static global initializers
2678 // for example) would be referring to the original function, and this
2679 // indirect jump would jump from the inlined copy of the function into the
2680 // original function which is extremely undefined behavior.
2681 // FIXME: This logic isn't really right; we can safely inline functions with
2682 // indirectbr's as long as no other function or global references the
2683 // blockaddress of a block within the current function.
2684 HasIndirectBr = true;
2685 return false;
2686}
2687
2688bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
2689 // FIXME: It's not clear that a single instruction is an accurate model for
2690 // the inline cost of a resume instruction.
2691 return false;
2692}
2693
2694bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
2695 // FIXME: It's not clear that a single instruction is an accurate model for
2696 // the inline cost of a cleanupret instruction.
2697 return false;
2698}
2699
2700bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
2701 // FIXME: It's not clear that a single instruction is an accurate model for
2702 // the inline cost of a catchret instruction.
2703 return false;
2704}
2705
2706bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
2707 // FIXME: It might be reasonably to discount the cost of instructions leading
2708 // to unreachable as they have the lowest possible impact on both runtime and
2709 // code size.
2710 return true; // No actual code is needed for unreachable.
2711}
2712
2713bool CallAnalyzer::visitInstruction(Instruction &I) {
2714 // Some instructions are free. All of the free intrinsics can also be
2715 // handled by SROA, etc.
2718 return true;
2719
2720 // We found something we don't understand or can't handle. Mark any SROA-able
2721 // values in the operand list as no longer viable.
2722 for (const Use &Op : I.operands())
2723 disableSROA(Op);
2724
2725 return false;
2726}
2727
2728/// Analyze a basic block for its contribution to the inline cost.
2729///
2730/// This method walks the analyzer over every instruction in the given basic
2731/// block and accounts for their cost during inlining at this callsite. It
2732/// aborts early if the threshold has been exceeded or an impossible to inline
2733/// construct has been detected. It returns false if inlining is no longer
2734/// viable, and true if inlining remains viable.
2735InlineResult
2736CallAnalyzer::analyzeBlock(BasicBlock *BB,
2737 const SmallPtrSetImpl<const Value *> &EphValues) {
2738 for (Instruction &I : *BB) {
2739 // FIXME: Currently, the number of instructions in a function regardless of
2740 // our ability to simplify them during inline to constants or dead code,
2741 // are actually used by the vector bonus heuristic. As long as that's true,
2742 // we have to special case debug intrinsics here to prevent differences in
2743 // inlining due to debug symbols. Eventually, the number of unsimplified
2744 // instructions shouldn't factor into the cost computation, but until then,
2745 // hack around it here.
2746 // Similarly, skip pseudo-probes.
2747 if (I.isDebugOrPseudoInst())
2748 continue;
2749
2750 // Skip ephemeral values.
2751 if (EphValues.count(&I))
2752 continue;
2753
2754 ++NumInstructions;
2755 if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
2756 ++NumVectorInstructions;
2757
2758 // If the instruction simplified to a constant, there is no cost to this
2759 // instruction. Visit the instructions using our InstVisitor to account for
2760 // all of the per-instruction logic. The visit tree returns true if we
2761 // consumed the instruction in any way, and false if the instruction's base
2762 // cost should count against inlining.
2763 onInstructionAnalysisStart(&I);
2764
2765 if (Base::visit(&I))
2766 ++NumInstructionsSimplified;
2767 else
2768 onMissedSimplification();
2769
2770 onInstructionAnalysisFinish(&I);
2771 using namespace ore;
2772 // If the visit this instruction detected an uninlinable pattern, abort.
2773 InlineResult IR = InlineResult::success();
2774 if (IsRecursiveCall && !AllowRecursiveCall)
2775 IR = InlineResult::failure("recursive");
2776 else if (ExposesReturnsTwice)
2777 IR = InlineResult::failure("exposes returns twice");
2778 else if (HasDynamicAlloca)
2779 IR = InlineResult::failure("dynamic alloca");
2780 else if (HasIndirectBr)
2781 IR = InlineResult::failure("indirect branch");
2782 else if (HasUninlineableIntrinsic)
2783 IR = InlineResult::failure("uninlinable intrinsic");
2784 else if (InitsVargArgs)
2785 IR = InlineResult::failure("varargs");
2786 if (!IR.isSuccess()) {
2787 if (ORE)
2788 ORE->emit([&]() {
2789 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2790 &CandidateCall)
2791 << NV("Callee", &F) << " has uninlinable pattern ("
2792 << NV("InlineResult", IR.getFailureReason())
2793 << ") and cost is not fully computed";
2794 });
2795 return IR;
2796 }
2797
2798 // If the caller is a recursive function then we don't want to inline
2799 // functions which allocate a lot of stack space because it would increase
2800 // the caller stack usage dramatically.
2801 if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
2802 auto IR =
2803 InlineResult::failure("recursive and allocates too much stack space");
2804 if (ORE)
2805 ORE->emit([&]() {
2806 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
2807 &CandidateCall)
2808 << NV("Callee", &F) << " is "
2809 << NV("InlineResult", IR.getFailureReason())
2810 << ". Cost is not fully computed";
2811 });
2812 return IR;
2813 }
2814
2815 if (shouldStop())
2816 return InlineResult::failure(
2817 "Call site analysis is not favorable to inlining.");
2818 }
2819
2820 return InlineResult::success();
2821}
2822
2823/// Compute the base pointer and cumulative constant offsets for V.
2824///
2825/// This strips all constant offsets off of V, leaving it the base pointer, and
2826/// accumulates the total constant offset applied in the returned constant. It
2827/// returns 0 if V is not a pointer, and returns the constant '0' if there are
2828/// no constant offsets applied.
2829ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
2830 if (!V->getType()->isPointerTy())
2831 return nullptr;
2832
2833 unsigned AS = V->getType()->getPointerAddressSpace();
2834 unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
2835 APInt Offset = APInt::getZero(IntPtrWidth);
2836
2837 // Even though we don't look through PHI nodes, we could be called on an
2838 // instruction in an unreachable block, which may be on a cycle.
2839 SmallPtrSet<Value *, 4> Visited;
2840 Visited.insert(V);
2841 do {
2842 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2843 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
2844 return nullptr;
2845 V = GEP->getPointerOperand();
2846 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2847 if (GA->isInterposable())
2848 break;
2849 V = GA->getAliasee();
2850 } else {
2851 break;
2852 }
2853 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
2854 } while (Visited.insert(V).second);
2855
2856 Type *IdxPtrTy = DL.getIndexType(V->getType());
2857 return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
2858}
2859
2860/// Find dead blocks due to deleted CFG edges during inlining.
2861///
2862/// If we know the successor of the current block, \p CurrBB, has to be \p
2863/// NextBB, the other successors of \p CurrBB are dead if these successors have
2864/// no live incoming CFG edges. If one block is found to be dead, we can
2865/// continue growing the dead block list by checking the successors of the dead
2866/// blocks to see if all their incoming edges are dead or not.
2867void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
2868 auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
2869 // A CFG edge is dead if the predecessor is dead or the predecessor has a
2870 // known successor which is not the one under exam.
2871 if (DeadBlocks.count(Pred))
2872 return true;
2873 BasicBlock *KnownSucc = KnownSuccessors[Pred];
2874 return KnownSucc && KnownSucc != Succ;
2875 };
2876
2877 auto IsNewlyDead = [&](BasicBlock *BB) {
2878 // If all the edges to a block are dead, the block is also dead.
2879 return (!DeadBlocks.count(BB) &&
2881 [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
2882 };
2883
2884 for (BasicBlock *Succ : successors(CurrBB)) {
2885 if (Succ == NextBB || !IsNewlyDead(Succ))
2886 continue;
2888 NewDead.push_back(Succ);
2889 while (!NewDead.empty()) {
2890 BasicBlock *Dead = NewDead.pop_back_val();
2891 if (DeadBlocks.insert(Dead).second)
2892 // Continue growing the dead block lists.
2893 for (BasicBlock *S : successors(Dead))
2894 if (IsNewlyDead(S))
2895 NewDead.push_back(S);
2896 }
2897 }
2898}
2899
2900/// Analyze a call site for potential inlining.
2901///
2902/// Returns true if inlining this call is viable, and false if it is not
2903/// viable. It computes the cost and adjusts the threshold based on numerous
2904/// factors and heuristics. If this method returns false but the computed cost
2905/// is below the computed threshold, then inlining was forcibly disabled by
2906/// some artifact of the routine.
2907InlineResult CallAnalyzer::analyze() {
2908 ++NumCallsAnalyzed;
2909
2910 auto Result = onAnalysisStart();
2911 if (!Result.isSuccess())
2912 return Result;
2913
2914 if (F.empty())
2915 return InlineResult::success();
2916
2917 Function *Caller = CandidateCall.getFunction();
2918 // Check if the caller function is recursive itself.
2919 for (User *U : Caller->users()) {
2920 CallBase *Call = dyn_cast<CallBase>(U);
2921 if (Call && Call->getFunction() == Caller) {
2922 IsCallerRecursive = true;
2923 break;
2924 }
2925 }
2926
2927 // Populate our simplified values by mapping from function arguments to call
2928 // arguments with known important simplifications.
2929 auto CAI = CandidateCall.arg_begin();
2930 for (Argument &FAI : F.args()) {
2931 assert(CAI != CandidateCall.arg_end());
2932 SimplifiedValues[&FAI] = *CAI;
2933 if (isa<Constant>(*CAI))
2934 ++NumConstantArgs;
2935
2936 Value *PtrArg = *CAI;
2937 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
2938 ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
2939
2940 // We can SROA any pointer arguments derived from alloca instructions.
2941 if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
2942 SROAArgValues[&FAI] = SROAArg;
2943 onInitializeSROAArg(SROAArg);
2944 EnabledSROAAllocas.insert(SROAArg);
2945 }
2946 }
2947 ++CAI;
2948 }
2949 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
2950 NumAllocaArgs = SROAArgValues.size();
2951
2952 // Collecting the ephemeral values of `F` can be expensive, so use the
2953 // ephemeral values cache if available.
2954 SmallPtrSet<const Value *, 32> EphValuesStorage;
2955 const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2956 if (GetEphValuesCache)
2957 EphValues = &GetEphValuesCache(F).ephValues();
2958 else
2959 CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F),
2960 EphValuesStorage);
2961
2962 // The worklist of live basic blocks in the callee *after* inlining. We avoid
2963 // adding basic blocks of the callee which can be proven to be dead for this
2964 // particular call site in order to get more accurate cost estimates. This
2965 // requires a somewhat heavyweight iteration pattern: we need to walk the
2966 // basic blocks in a breadth-first order as we insert live successors. To
2967 // accomplish this, prioritizing for small iterations because we exit after
2968 // crossing our threshold, we use a small-size optimized SetVector.
2969 typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
2970 BBSetVector BBWorklist;
2971 BBWorklist.insert(&F.getEntryBlock());
2972
2973 // Note that we *must not* cache the size, this loop grows the worklist.
2974 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
2975 if (shouldStop())
2976 break;
2977
2978 BasicBlock *BB = BBWorklist[Idx];
2979 if (BB->empty())
2980 continue;
2981
2982 onBlockStart(BB);
2983
2984 // Disallow inlining a blockaddress.
2985 // A blockaddress only has defined behavior for an indirect branch in the
2986 // same function, and we do not currently support inlining indirect
2987 // branches. But, the inliner may not see an indirect branch that ends up
2988 // being dead code at a particular call site. If the blockaddress escapes
2989 // the function, e.g., via a global variable, inlining may lead to an
2990 // invalid cross-function reference.
2991 // FIXME: pr/39560: continue relaxing this overt restriction.
2992 if (BB->hasAddressTaken())
2993 return InlineResult::failure("blockaddress used");
2994
2995 // Analyze the cost of this block. If we blow through the threshold, this
2996 // returns false, and we can bail on out.
2997 InlineResult IR = analyzeBlock(BB, *EphValues);
2998 if (!IR.isSuccess())
2999 return IR;
3000
3001 Instruction *TI = BB->getTerminator();
3002
3003 // Add in the live successors by first checking whether we have terminator
3004 // that may be simplified based on the values simplified by this call.
3005 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
3006 Value *Cond = BI->getCondition();
3007 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
3008 BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
3009 BBWorklist.insert(NextBB);
3010 KnownSuccessors[BB] = NextBB;
3011 findDeadBlocks(BB, NextBB);
3012 continue;
3013 }
3014 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
3015 Value *Cond = SI->getCondition();
3016 if (ConstantInt *SimpleCond = getSimplifiedValue<ConstantInt>(Cond)) {
3017 BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
3018 BBWorklist.insert(NextBB);
3019 KnownSuccessors[BB] = NextBB;
3020 findDeadBlocks(BB, NextBB);
3021 continue;
3022 }
3023 }
3024
3025 // If we're unable to select a particular successor, just count all of
3026 // them.
3027 BBWorklist.insert_range(successors(BB));
3028
3029 onBlockAnalyzed(BB);
3030 }
3031
3032 // If this is a noduplicate call, we can still inline as long as
3033 // inlining this would cause the removal of the caller (so the instruction
3034 // is not actually duplicated, just moved).
3035 if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
3036 return InlineResult::failure("noduplicate");
3037
3038 // If the callee's stack size exceeds the user-specified threshold,
3039 // do not let it be inlined.
3040 // The command line option overrides a limit set in the function attributes.
3041 size_t FinalStackSizeThreshold = StackSizeThreshold;
3042 if (!StackSizeThreshold.getNumOccurrences())
3043 if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
3045 FinalStackSizeThreshold = *AttrMaxStackSize;
3046 if (AllocatedSize > FinalStackSizeThreshold)
3047 return InlineResult::failure("stacksize");
3048
3049 return finalizeAnalysis();
3050}
3051
3052void InlineCostCallAnalyzer::print(raw_ostream &OS) {
3053#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
3055 F.print(OS, &Writer);
3056 DEBUG_PRINT_STAT(NumConstantArgs);
3057 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
3058 DEBUG_PRINT_STAT(NumAllocaArgs);
3059 DEBUG_PRINT_STAT(NumConstantPtrCmps);
3060 DEBUG_PRINT_STAT(NumConstantPtrDiffs);
3061 DEBUG_PRINT_STAT(NumInstructionsSimplified);
3062 DEBUG_PRINT_STAT(NumInstructions);
3063 DEBUG_PRINT_STAT(NumInlineAsmInstructions);
3064 DEBUG_PRINT_STAT(SROACostSavings);
3065 DEBUG_PRINT_STAT(SROACostSavingsLost);
3066 DEBUG_PRINT_STAT(LoadEliminationCost);
3067 DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
3069 DEBUG_PRINT_STAT(Threshold);
3070#undef DEBUG_PRINT_STAT
3071}
3072
3073#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3074/// Dump stats about this call's analysis.
3075LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
3076#endif
3077
3078/// Test that there are no attribute conflicts between Caller and Callee
3079/// that prevent inlining.
3081 Function *Caller, Function *Callee, TargetTransformInfo &TTI,
3082 function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
3083 // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
3084 // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
3085 // object, and always returns the same object (which is overwritten on each
3086 // GetTLI call). Therefore we copy the first result.
3087 auto CalleeTLI = GetTLI(*Callee);
3088 return (IgnoreTTIInlineCompatible ||
3089 TTI.areInlineCompatible(Caller, Callee)) &&
3090 GetTLI(*Caller).areInlineCompatible(CalleeTLI,
3092 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
3093}
3094
3096 const DataLayout &DL) {
3097 int64_t Cost = 0;
3098 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
3099 if (Call.isByValArgument(I)) {
3100 // We approximate the number of loads and stores needed by dividing the
3101 // size of the byval type by the target's pointer size.
3102 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
3103 unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
3104 unsigned AS = PTy->getAddressSpace();
3105 unsigned PointerSize = DL.getPointerSizeInBits(AS);
3106 // Ceiling division.
3107 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
3108
3109 // If it generates more than 8 stores it is likely to be expanded as an
3110 // inline memcpy so we take that as an upper bound. Otherwise we assume
3111 // one load and one store per word copied.
3112 // FIXME: The maxStoresPerMemcpy setting from the target should be used
3113 // here instead of a magic number of 8, but it's not available via
3114 // DataLayout.
3115 NumStores = std::min(NumStores, 8U);
3116
3117 Cost += 2 * NumStores * InstrCost;
3118 } else {
3119 // For non-byval arguments subtract off one instruction per call
3120 // argument.
3121 Cost += InstrCost;
3122 }
3123 }
3124 // The call instruction also disappears after inlining.
3125 Cost += InstrCost;
3126 Cost += TTI.getInlineCallPenalty(Call.getCaller(), Call, CallPenalty);
3127
3128 return std::min<int64_t>(Cost, INT_MAX);
3129}
3130
3132 CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
3133 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3134 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3137 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3138 return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
3139 GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3140 GetEphValuesCache);
3141}
3142
3144 CallBase &Call, TargetTransformInfo &CalleeTTI,
3145 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3147 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3149 const InlineParams Params = {/* DefaultThreshold*/ 0,
3150 /*HintThreshold*/ {},
3151 /*ColdThreshold*/ {},
3152 /*OptSizeThreshold*/ {},
3153 /*OptMinSizeThreshold*/ {},
3154 /*HotCallSiteThreshold*/ {},
3155 /*LocallyHotCallSiteThreshold*/ {},
3156 /*ColdCallSiteThreshold*/ {},
3157 /*ComputeFullInlineCost*/ true,
3158 /*EnableDeferral*/ true};
3159
3160 InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
3161 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
3162 /*IgnoreThreshold*/ true);
3163 auto R = CA.analyze();
3164 if (!R.isSuccess())
3165 return std::nullopt;
3166 return CA.getCost();
3167}
3168
3169std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
3170 CallBase &Call, TargetTransformInfo &CalleeTTI,
3171 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3173 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3175 InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3176 PSI, ORE, *Call.getCalledFunction(), Call);
3177 auto R = CFA.analyze();
3178 if (!R.isSuccess())
3179 return std::nullopt;
3180 return CFA.features();
3181}
3182
3184 CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
3185 function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
3186
3187 // Cannot inline indirect calls.
3188 if (!Callee)
3189 return InlineResult::failure("indirect call");
3190
3191 // When callee coroutine function is inlined into caller coroutine function
3192 // before coro-split pass,
3193 // coro-early pass can not handle this quiet well.
3194 // So we won't inline the coroutine function if it have not been unsplited
3195 if (Callee->isPresplitCoroutine())
3196 return InlineResult::failure("unsplited coroutine call");
3197
3198 // Never inline calls with byval arguments that does not have the alloca
3199 // address space. Since byval arguments can be replaced with a copy to an
3200 // alloca, the inlined code would need to be adjusted to handle that the
3201 // argument is in the alloca address space (so it is a little bit complicated
3202 // to solve).
3203 unsigned AllocaAS = Callee->getDataLayout().getAllocaAddrSpace();
3204 for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
3205 if (Call.isByValArgument(I)) {
3206 PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
3207 if (PTy->getAddressSpace() != AllocaAS)
3208 return InlineResult::failure("byval arguments without alloca"
3209 " address space");
3210 }
3211
3212 // Calls to functions with always-inline attributes should be inlined
3213 // whenever possible.
3214 if (Call.hasFnAttr(Attribute::AlwaysInline)) {
3215 if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
3216 return InlineResult::failure("noinline call site attribute");
3217
3218 auto IsViable = isInlineViable(*Callee);
3219 if (IsViable.isSuccess())
3220 return InlineResult::success();
3221 return InlineResult::failure(IsViable.getFailureReason());
3222 }
3223
3224 // Never inline functions with conflicting attributes (unless callee has
3225 // always-inline attribute).
3226 Function *Caller = Call.getCaller();
3227 if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
3228 return InlineResult::failure("conflicting attributes");
3229
3230 // Flatten: inline all viable calls from flatten functions regardless of cost.
3231 // Checked before optnone so that flatten takes priority.
3232 if (Caller->hasFnAttribute(Attribute::Flatten)) {
3233 auto IsViable = isInlineViable(*Callee);
3234 if (IsViable.isSuccess())
3235 return InlineResult::success();
3236 return InlineResult::failure(IsViable.getFailureReason());
3237 }
3238
3239 // Don't inline this call if the caller has the optnone attribute.
3240 if (Caller->hasOptNone())
3241 return InlineResult::failure("optnone attribute");
3242
3243 // Don't inline a function that treats null pointer as valid into a caller
3244 // that does not have this attribute.
3245 if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
3246 return InlineResult::failure("nullptr definitions incompatible");
3247
3248 // Don't inline functions which can be interposed at link-time.
3249 if (Callee->isInterposable())
3250 return InlineResult::failure("interposable");
3251
3252 // Don't inline functions marked noinline.
3253 if (Callee->hasFnAttribute(Attribute::NoInline))
3254 return InlineResult::failure("noinline function attribute");
3255
3256 // Don't inline call sites marked noinline.
3257 if (Call.isNoInline())
3258 return InlineResult::failure("noinline call site attribute");
3259
3260 // Don't inline functions that are loader replaceable.
3261 if (Callee->hasFnAttribute("loader-replaceable"))
3262 return InlineResult::failure("loader replaceable function attribute");
3263
3264 return std::nullopt;
3265}
3266
3268 CallBase &Call, Function *Callee, const InlineParams &Params,
3269 TargetTransformInfo &CalleeTTI,
3270 function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
3271 function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
3274 function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
3275
3276 auto UserDecision =
3277 llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
3278
3279 if (UserDecision) {
3280 if (UserDecision->isSuccess())
3281 return llvm::InlineCost::getAlways("always inline attribute");
3282 return llvm::InlineCost::getNever(UserDecision->getFailureReason());
3283 }
3284
3287 "Inlining forced by -inline-all-viable-calls");
3288
3289 LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
3290 << "... (caller:" << Call.getCaller()->getName()
3291 << ")\n");
3292
3293 InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3294 GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3295 /*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
3296 GetEphValuesCache);
3297 InlineResult ShouldInline = CA.analyze();
3298
3299 LLVM_DEBUG(CA.dump());
3300
3301 // Always make cost benefit based decision explicit.
3302 // We use always/never here since threshold is not meaningful,
3303 // as it's not what drives cost-benefit analysis.
3304 if (CA.wasDecidedByCostBenefit()) {
3305 if (ShouldInline.isSuccess())
3306 return InlineCost::getAlways("benefit over cost",
3307 CA.getCostBenefitPair());
3308 else
3309 return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
3310 }
3311
3312 if (CA.wasDecidedByCostThreshold())
3313 return InlineCost::get(CA.getCost(), CA.getThreshold(),
3314 CA.getStaticBonusApplied());
3315
3316 // No details on how the decision was made, simply return always or never.
3317 return ShouldInline.isSuccess()
3318 ? InlineCost::getAlways("empty function")
3319 : InlineCost::getNever(ShouldInline.getFailureReason());
3320}
3321
3323 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
3324 for (BasicBlock &BB : F) {
3325 // Disallow inlining of functions which contain indirect branches.
3327 return InlineResult::failure("contains indirect branches");
3328
3329 // Disallow inlining of blockaddresses.
3330 if (BB.hasAddressTaken())
3331 return InlineResult::failure("blockaddress used");
3332
3333 for (auto &II : BB) {
3335 if (!Call)
3336 continue;
3337
3338 // Disallow recursive calls.
3339 Function *Callee = Call->getCalledFunction();
3340 if (&F == Callee)
3341 return InlineResult::failure("recursive call");
3342
3343 // Disallow calls which expose returns-twice to a function not previously
3344 // attributed as such.
3345 if (!ReturnsTwice && isa<CallInst>(Call) &&
3346 cast<CallInst>(Call)->canReturnTwice())
3347 return InlineResult::failure("exposes returns-twice attribute");
3348
3349 if (Callee)
3350 switch (Callee->getIntrinsicID()) {
3351 default:
3352 break;
3353 case llvm::Intrinsic::icall_branch_funnel:
3354 // Disallow inlining of @llvm.icall.branch.funnel because current
3355 // backend can't separate call targets from call arguments.
3356 return InlineResult::failure(
3357 "disallowed inlining of @llvm.icall.branch.funnel");
3358 case llvm::Intrinsic::localescape:
3359 // Disallow inlining functions that call @llvm.localescape. Doing this
3360 // correctly would require major changes to the inliner.
3361 return InlineResult::failure(
3362 "disallowed inlining of @llvm.localescape");
3363 case llvm::Intrinsic::vastart:
3364 // Disallow inlining of functions that initialize VarArgs with
3365 // va_start.
3366 return InlineResult::failure(
3367 "contains VarArgs initialized with va_start");
3368 }
3369 }
3370 }
3371
3372 return InlineResult::success();
3373}
3374
3375// APIs to create InlineParams based on command line flags and/or other
3376// parameters.
3377
3379 InlineParams Params;
3380
3381 // This field is the threshold to use for a callee by default. This is
3382 // derived from one or more of:
3383 // * optimization or size-optimization levels,
3384 // * a value passed to createFunctionInliningPass function, or
3385 // * the -inline-threshold flag.
3386 // If the -inline-threshold flag is explicitly specified, that is used
3387 // irrespective of anything else.
3388 if (InlineThreshold.getNumOccurrences() > 0)
3390 else
3391 Params.DefaultThreshold = Threshold;
3392
3393 // Set the HintThreshold knob from the -inlinehint-threshold.
3395
3396 // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
3398
3399 // If the -locally-hot-callsite-threshold is explicitly specified, use it to
3400 // populate LocallyHotCallSiteThreshold. Later, we populate
3401 // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
3402 // we know that optimization level is O3 (in the getInlineParams variant that
3403 // takes the opt and size levels).
3404 // FIXME: Remove this check (and make the assignment unconditional) after
3405 // addressing size regression issues at O2.
3406 if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
3408
3409 // Set the ColdCallSiteThreshold knob from the
3410 // -inline-cold-callsite-threshold.
3412
3413 // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
3414 // -inlinehint-threshold commandline option is not explicitly given. If that
3415 // option is present, then its value applies even for callees with size and
3416 // minsize attributes.
3417 // If the -inline-threshold is not specified, set the ColdThreshold from the
3418 // -inlinecold-threshold even if it is not explicitly passed. If
3419 // -inline-threshold is specified, then -inlinecold-threshold needs to be
3420 // explicitly specified to set the ColdThreshold knob
3421 if (InlineThreshold.getNumOccurrences() == 0) {
3425 } else if (ColdThreshold.getNumOccurrences() > 0) {
3427 }
3428 return Params;
3429}
3430
3434
3435// Compute the default threshold for inlining based on the opt level and the
3436// size opt level.
3437static int computeThresholdFromOptLevels(unsigned OptLevel,
3438 unsigned SizeOptLevel) {
3439 if (OptLevel > 2)
3441 if (SizeOptLevel == 1) // -Os
3443 if (SizeOptLevel == 2) // -Oz
3445 return DefaultThreshold;
3446}
3447
3448InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
3449 auto Params =
3450 getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
3451 // At O3, use the value of -locally-hot-callsite-threshold option to populate
3452 // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
3453 // when it is specified explicitly.
3454 if (OptLevel > 2)
3456 return Params;
3457}
3458
3463 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
3464 [&](Function &F) -> AssumptionCache & {
3465 return FAM.getResult<AssumptionAnalysis>(F);
3466 };
3467
3468 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
3469 ProfileSummaryInfo *PSI =
3470 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3471 const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
3472
3473 // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
3474 // In the current implementation, the type of InlineParams doesn't matter as
3475 // the pass serves only for verification of inliner's decisions.
3476 // We can add a flag which determines InlineParams for this run. Right now,
3477 // the default InlineParams are used.
3478 const InlineParams Params = llvm::getInlineParams();
3479 for (BasicBlock &BB : F) {
3480 for (Instruction &I : BB) {
3481 if (auto *CB = dyn_cast<CallBase>(&I)) {
3482 Function *CalledFunction = CB->getCalledFunction();
3483 if (!CalledFunction || CalledFunction->isDeclaration())
3484 continue;
3485 OptimizationRemarkEmitter ORE(CalledFunction);
3486 InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
3487 GetAssumptionCache, nullptr, nullptr, PSI,
3488 &ORE);
3489 ICCA.analyze();
3490 OS << " Analyzing call of " << CalledFunction->getName()
3491 << "... (caller:" << CB->getCaller()->getName() << ")\n";
3492 ICCA.print(OS);
3493 OS << "\n";
3494 }
3495 }
3496 }
3497 return PreservedAnalyses::all();
3498}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI)
Definition CostModel.cpp:73
#define DEBUG_TYPE
static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI)
Return true if the block containing the call site has a BlockFrequency of less than ColdCCRelFreq% of...
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
static cl::opt< int > InlineAsmInstrCost("inline-asm-instr-cost", cl::Hidden, cl::init(0), cl::desc("Cost of a single inline asm instruction when inlining"))
static cl::opt< int > InlineSavingsMultiplier("inline-savings-multiplier", cl::Hidden, cl::init(8), cl::desc("Multiplier to multiply cycle savings by during inlining"))
static cl::opt< int > InlineThreshold("inline-threshold", cl::Hidden, cl::init(225), cl::desc("Control the amount of inlining to perform (default = 225)"))
static cl::opt< int > CallPenalty("inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining"))
static cl::opt< int > HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::desc("Threshold for hot callsites "))
static cl::opt< int > ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute"))
static cl::opt< size_t > RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden, cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller), cl::desc("Do not inline recursive functions with a stack " "size that exceeds the specified limit"))
static cl::opt< bool > PrintInstructionComments("print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis"))
static cl::opt< int > LocallyHotCallSiteThreshold("locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::desc("Threshold for locally hot callsites "))
static cl::opt< bool > InlineCallerSupersetNoBuiltin("inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes."))
static cl::opt< int > HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), cl::desc("Threshold for inlining functions with inline hint"))
static cl::opt< size_t > StackSizeThreshold("inline-max-stacksize", cl::Hidden, cl::init(std::numeric_limits< size_t >::max()), cl::desc("Do not inline functions with a stack size " "that exceeds the specified limit"))
static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel)
static cl::opt< uint64_t > HotCallSiteRelFreq("hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information."))
static cl::opt< int > InlineSavingsProfitableMultiplier("inline-savings-profitable-multiplier", cl::Hidden, cl::init(4), cl::desc("A multiplier on top of cycle savings to decide whether the " "savings won't justify the cost"))
static cl::opt< int > MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining"))
static cl::opt< int > ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites"))
static cl::opt< bool > IgnoreTTIInlineCompatible("ignore-tti-inline-compatible", cl::Hidden, cl::init(false), cl::desc("Ignore TTI attributes compatibility check between callee/caller " "during inline cost calculation"))
static cl::opt< bool > OptComputeFullInlineCost("inline-cost-full", cl::Hidden, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold."))
#define DEBUG_PRINT_STAT(x)
static cl::opt< bool > InlineEnableCostBenefitAnalysis("inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner"))
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static cl::opt< bool > InlineAllViableCalls("inline-all-viable-calls", cl::Hidden, cl::init(false), cl::desc("Inline all viable calls, even if they exceed the inlining " "threshold"))
static cl::opt< int > InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings"))
static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref< const TargetLibraryInfo &(Function &)> &GetTLI)
Test that there are no attribute conflicts between Caller and Callee that prevent inlining.
static cl::opt< int > ColdCallSiteRelFreq("cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information."))
static cl::opt< bool > DisableGEPConstOperand("disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands"))
static cl::opt< int > DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::desc("Default amount of inlining to perform"))
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define T
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1604
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1072
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
PointerType * getType() const
Overload to return most specific pointer type.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool empty() const
Definition BasicBlock.h:483
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getEntryFreq() const
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
bool onlyReadsMemory(unsigned OpNo) const
Value * getCalledOperand() const
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
FunctionType * getFunctionType() const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
LLVM_ABI bool isAllOnesValue() const
Return true if this is the value that would be returned by getAllOnesValue.
Definition Constants.cpp:95
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
A cache of ephemeral values within a function.
Type * getReturnType() const
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:331
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
LLVM_ABI void collectAsmStrs(SmallVectorImpl< StringRef > &AsmStrs) const
Definition InlineAsm.cpp:63
Represents the cost of inlining a function.
Definition InlineCost.h:91
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:132
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:127
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition InlineCost.h:121
InlineResult is basically true or false.
Definition InlineCost.h:181
static InlineResult success()
Definition InlineCost.h:186
static InlineResult failure(const char *Reason)
Definition InlineCost.h:187
bool isSuccess() const
Definition InlineCost.h:190
const char * getFailureReason() const
Definition InlineCost.h:191
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:446
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:290
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:767
Analysis pass providing the TargetTransformInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
LLVM_ABI unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
LLVM_ABI unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI int getInliningLastCallToStaticBonus() const
LLVM_ABI unsigned adjustInliningThreshold(const CallBase *CB) const
LLVM_ABI unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
LLVM_ABI int getInlinerVectorBonusPercent() const
LLVM_ABI bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
LLVM_ABI unsigned getInliningThresholdMultiplier() const
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
LLVM_ABI unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
LLVM_ABI InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
static constexpr TypeSize getZero()
Definition TypeSize.h:349
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
bool erase(const ValueT &V)
Definition DenseSet.h:100
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
const int ColdccPenalty
Definition InlineCost.h:52
const char FunctionInlineCostMultiplierAttributeName[]
Definition InlineCost.h:60
const int OptSizeThreshold
Use when optsize (-Os) is specified.
Definition InlineCost.h:40
const int OptMinSizeThreshold
Use when minsize (-Oz) is specified.
Definition InlineCost.h:43
const uint64_t MaxSimplifiedDynamicAllocaToInline
Do not inline dynamic allocas that have been constant propagated to be static allocas above this amou...
Definition InlineCost.h:58
const int IndirectCallThreshold
Definition InlineCost.h:50
const int OptAggressiveThreshold
Use when -O3 is specified.
Definition InlineCost.h:46
const char MaxInlineStackSizeAttributeName[]
Definition InlineCost.h:63
const unsigned TotalAllocaSizeRecursiveCaller
Do not inline functions which allocate this many bytes on the stack when the caller is recursive.
Definition InlineCost.h:55
LLVM_ABI int getInstrCost()
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
InstructionCost Cost
@ Dead
Unused definition.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
LLVM_ABI bool canConstantFoldCallTo(const CallBase *Call, const Function *F)
canConstantFoldCallTo - Return true if its even possible to fold a call to the specified function.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI std::optional< int > getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind)
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
LogicalResult failure(bool IsFailure=true)
Utility function to generate a LogicalResult.
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Constant * ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef< Constant * > Operands, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldCall - Attempt to constant fold a call to the specified function with the specified argum...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI InlineResult isInlineViable(Function &Callee)
Check if it is mechanically possible to inline the function Callee, based on the contents of the func...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI Value * simplifyFNegInst(Value *Op, FastMathFlags FMF, const SimplifyQuery &Q)
Given operand for an FNeg, fold the result or return null.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
generic_gep_type_iterator<> gep_type_iterator
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, and add the unsigned integer, A to the product.
Definition MathExtras.h:684
Function::ProfileCount ProfileCount
LLVM_ABI std::optional< InlineCostFeatures > getInliningCostFeatures(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the expanded cost features.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
TargetTransformInfo TTI
LLVM_ABI std::optional< InlineResult > getAttributeBasedInliningDecision(CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref< const TargetLibraryInfo &(Function &)> GetTLI)
Returns InlineResult::success() if the call site should be always inlined because of user directives,...
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
DWARFExpression::Operation Op
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
LLVM_ABI int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI std::optional< int > getInliningCostEstimate(CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, function_ref< const TargetLibraryInfo &(Function &)> GetTLI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Get the cost estimate ignoring thresholds.
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingAdd(T X, T Y, bool *ResultOverflowed=nullptr)
Add two unsigned integers, X and Y, of type T.
Definition MathExtras.h:609
std::array< int, static_cast< size_t >(InlineCostFeatureIndex::NumberOfFeatures)> InlineCostFeatures
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptMinSizeThreshold
Threshold to use when the caller is optimized for minsize.
Definition InlineCost.h:221
std::optional< int > OptSizeThreshold
Threshold to use when the caller is optimized for size.
Definition InlineCost.h:218
std::optional< int > ColdCallSiteThreshold
Threshold to use when the callsite is considered cold.
Definition InlineCost.h:231
std::optional< int > ColdThreshold
Threshold to use for cold callees.
Definition InlineCost.h:215
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:224
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
std::optional< int > LocallyHotCallSiteThreshold
Threshold to use when the callsite is considered hot relative to function entry.
Definition InlineCost.h:228