LLVM 23.0.0git
LoopPeel.cpp
Go to the documentation of this file.
1//===- LoopPeel.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Loop Peeling Utilities.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/Analysis/Loads.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instruction.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
36#include "llvm/Support/Debug.h"
44#include <algorithm>
45#include <cassert>
46#include <cstdint>
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::PatternMatch;
51using namespace llvm::SCEVPatternMatch;
52
53#define DEBUG_TYPE "loop-peel"
54
55STATISTIC(NumPeeled, "Number of loops peeled");
56STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
57
58namespace llvm {
60 "unroll-peel-count", cl::Hidden,
61 cl::desc("Set the unroll peeling count, for testing purposes"));
62
63static cl::opt<bool>
64 UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
65 cl::desc("Allows loops to be peeled when the dynamic "
66 "trip count is known to be low."));
67
68static cl::opt<bool>
69 UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
70 cl::init(false), cl::Hidden,
71 cl::desc("Allows loop nests to be peeled."));
72
74 "unroll-peel-max-count", cl::init(7), cl::Hidden,
75 cl::desc("Max average trip count which will cause loop peeling."));
76
78 "unroll-force-peel-count", cl::init(0), cl::Hidden,
79 cl::desc("Force a peel count regardless of profiling information."));
80
82 "disable-advanced-peeling", cl::init(false), cl::Hidden,
84 "Disable advance peeling. Issues for convergent targets (D134803)."));
85
87 "enable-peeling-for-iv", cl::init(false), cl::Hidden,
88 cl::desc("Enable peeling to convert Phi nodes into IVs"));
89
90static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
91
93} // namespace llvm
94
95// Check whether we are capable of peeling this loop.
96bool llvm::canPeel(const Loop *L) {
97 // Make sure the loop is in simplified form
98 if (!L->isLoopSimplifyForm())
99 return false;
101 return true;
102
104 L->getUniqueNonLatchExitBlocks(Exits);
105 // The latch must either be the only exiting block or all non-latch exit
106 // blocks have either a deopt or unreachable terminator or compose a chain of
107 // blocks where the last one is either deopt or unreachable terminated. Both
108 // deopt and unreachable terminators are a strong indication they are not
109 // taken. Note that this is a profitability check, not a legality check. Also
110 // note that LoopPeeling currently can only update the branch weights of latch
111 // blocks and branch weights to blocks with deopt or unreachable do not need
112 // updating.
114}
115
116namespace {
117
118// As a loop is peeled, it may be the case that Phi nodes become
119// loop-invariant (ie, known because there is only one choice).
120// For example, consider the following function:
121// void g(int);
122// void binary() {
123// int x = 0;
124// int y = 0;
125// int a = 0;
126// for(int i = 0; i <100000; ++i) {
127// g(x);
128// x = y;
129// g(a);
130// y = a + 1;
131// a = 5;
132// }
133// }
134// Peeling 3 iterations is beneficial because the values for x, y and a
135// become known. The IR for this loop looks something like the following:
136//
137// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
138// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
139// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
140// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
141// ...
142// tail call void @_Z1gi(i32 signext %x)
143// tail call void @_Z1gi(i32 signext %a)
144// %add = add nuw nsw i32 %a, 1
145// %inc = add nuw nsw i32 %i, 1
146// %exitcond = icmp eq i32 %inc, 100000
147// br i1 %exitcond, label %for.cond.cleanup, label %for.body
148//
149// The arguments for the calls to g will become known after 3 iterations
150// of the loop, because the phi nodes values become known after 3 iterations
151// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
152// The first iteration has g(0), g(0); the second has g(0), g(5); the
153// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
154// Now consider the phi nodes:
155// %a is a phi with constants so it is determined after iteration 1.
156// %y is a phi based on a constant and %a so it is determined on
157// the iteration after %a is determined, so iteration 2.
158// %x is a phi based on a constant and %y so it is determined on
159// the iteration after %y, so iteration 3.
160// %i is based on itself (and is an induction variable) so it is
161// never determined.
162// This means that peeling off 3 iterations will result in being able to
163// remove the phi nodes for %a, %y, and %x. The arguments for the
164// corresponding calls to g are determined and the code for computing
165// x, y, and a can be removed.
166//
167// Similarly, there are cases where peeling makes Phi nodes loop-inductions
168// (i.e., the value is increased or decreased by a fixed amount on every
169// iteration). For example, consider the following function.
170//
171// #define N 100
172// void f(int a[], int b[]) {
173// int im = N - 1;
174// for (int i = 0; i < N; i++) {
175// a[i] = b[i] + b[im];
176// im = i;
177// }
178// }
179//
180// The IR of the loop will look something like the following.
181//
182// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
183// %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
184// ...
185// %i.next = add nuw nsw i32 %i, 1
186// ...
187//
188// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
189// because %i is a loop-induction one. The peeling count can be determined by
190// the same algorithm with loop-invariant case. Such peeling is profitable for
191// loop-vectorization.
192//
193// The PhiAnalyzer class calculates how many times a loop should be
194// peeled based on the above analysis of the phi nodes in the loop while
195// respecting the maximum specified.
196class PhiAnalyzer {
197public:
198 PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
199
200 // Calculate the sufficient minimum number of iterations of the loop to peel
201 // such that phi instructions become determined (subject to allowable limits)
202 std::optional<unsigned> calculateIterationsToPeel();
203
204protected:
205 enum class PeelCounterType {
206 Invariant,
207 Induction,
208 };
209
210 using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
211 using PeelCounter = std::optional<PeelCounterValue>;
212 const PeelCounter Unknown = std::nullopt;
213
214 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
215 PeelCounter addOne(PeelCounter PC) const {
216 if (PC == Unknown)
217 return Unknown;
218 auto [Val, Ty] = *PC;
219 return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
220 }
221
222 // Return a value representing zero for the given counter type.
223 PeelCounter makeZero(PeelCounterType Ty) const {
224 return PeelCounter({0, Ty});
225 }
226
227 // Calculate the number of iterations after which the given value becomes an
228 // invariant or an induction.
229 PeelCounter calculate(const Value &);
230
231 // Auxiliary function to calculate the number of iterations for a comparison
232 // instruction or a binary operator.
233 PeelCounter mergeTwoCounters(const Instruction &CmpOrBinaryOp,
234 const PeelCounterValue &LHS,
235 const PeelCounterValue &RHS) const;
236
237 // Returns true if the \p Phi is an induction in the target loop. This is a
238 // lightweight check and possible to detect an IV in some cases.
239 bool isInductionPHI(const PHINode *Phi) const;
240
241 const Loop &L;
242 const unsigned MaxIterations;
243 const bool PeelForIV;
244
245 // Map of Values to number of iterations to invariance or induction
246 SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
247};
248
249PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
250 : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
251 assert(canPeel(&L) && "loop is not suitable for peeling");
252 assert(MaxIterations > 0 && "no peeling is allowed?");
253}
254
255/// Test whether \p Phi is an induction variable. Although this can be
256/// determined using SCEV analysis, it is expensive to compute here. Instead,
257/// we perform cheaper checks that may not detect complex cases but are
258/// sufficient for some situations.
259bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
260 // Currently we only support a loop that has single latch.
261 BasicBlock *Latch = L.getLoopLatch();
262 if (Latch == nullptr)
263 return false;
264
265 Value *Cur = Phi->getIncomingValueForBlock(Latch);
266 SmallPtrSet<Value *, 4> Visited;
267 bool VisitBinOp = false;
268
269 // Starting from the incoming value of the Phi, we follow the use-def chain.
270 // We consider Phi to be an IV if we can reach it again by traversing only
271 // add, sub, or cast instructions.
272 while (true) {
273 if (Cur == Phi)
274 break;
275
276 // Avoid infinite loop.
277 if (!Visited.insert(Cur).second)
278 return false;
279
280 auto *I = dyn_cast<Instruction>(Cur);
281 if (!I || !L.contains(I))
282 return false;
283
284 if (auto *Cast = dyn_cast<CastInst>(I)) {
285 Cur = Cast->getOperand(0);
286 } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
287 if (BinOp->getOpcode() != Instruction::Add &&
288 BinOp->getOpcode() != Instruction::Sub)
289 return false;
290 if (!isa<ConstantInt>(BinOp->getOperand(1)))
291 return false;
292
293 VisitBinOp = true;
294 Cur = BinOp->getOperand(0);
295 } else {
296 return false;
297 }
298 }
299
300 // Ignore cases where no binary operations are visited.
301 return VisitBinOp;
302}
303
304/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
305/// considered an IV only if it is an addition or a subtraction. Otherwise the
306/// result can be a value that is neither a loop-invariant nor an IV.
307///
308/// If both \p LHS and \p RHS are loop-invariants, then the result of
309/// \CmpOrBinaryOp is also a loop-invariant.
310PhiAnalyzer::PeelCounter
311PhiAnalyzer::mergeTwoCounters(const Instruction &CmpOrBinaryOp,
312 const PeelCounterValue &LHS,
313 const PeelCounterValue &RHS) const {
314 auto &[LVal, LTy] = LHS;
315 auto &[RVal, RTy] = RHS;
316 unsigned NewVal = std::max(LVal, RVal);
317
318 if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
319 if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
320 if (BinOp->getOpcode() == Instruction::Add ||
321 BinOp->getOpcode() == Instruction::Sub)
322 return PeelCounter({NewVal, PeelCounterType::Induction});
323 }
324 return Unknown;
325 }
326 return PeelCounter({NewVal, PeelCounterType::Invariant});
327}
328
329// This function calculates the number of iterations after which the value
330// becomes an invariant. The pre-calculated values are memorized in a map.
331// N.B. This number will be Unknown or <= MaxIterations.
332// The function is calculated according to the following definition:
333// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
334// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
335// G(%y) = 0 if %y is a loop invariant
336// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
337// G(%y) = TODO: if %y is an expression based on phis and loop invariants
338// The example looks like:
339// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
340// %y = phi(0, 5)
341// %a = %y + 1
342// G(%y) = Unknown otherwise (including phi not in header block)
343PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
344 // If we already know the answer, take it from the map.
345 // Otherwise, place Unknown to map to avoid infinite recursion. Such
346 // cycles can never stop on an invariant.
347 auto [I, Inserted] =
348 IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
349 if (!Inserted)
350 return I->second;
351
352 if (L.isLoopInvariant(&V))
353 // Loop invariant so known at start.
354 return (IterationsToInvarianceOrInduction[&V] =
355 makeZero(PeelCounterType::Invariant));
356 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
357 if (Phi->getParent() != L.getHeader()) {
358 // Phi is not in header block so Unknown.
359 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
360 "unexpected value saved");
361 return Unknown;
362 }
363
364 // If Phi is an induction, register it as a starting point.
365 if (PeelForIV && isInductionPHI(Phi))
366 return (IterationsToInvarianceOrInduction[&V] =
367 makeZero(PeelCounterType::Induction));
368
369 // We need to analyze the input from the back edge and add 1.
370 Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
371 PeelCounter Iterations = calculate(*Input);
372 assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
373 "unexpected value saved");
374 return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
375 }
376 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
377 if (isa<CmpInst>(I) || I->isBinaryOp()) {
378 // Binary instructions get the max of the operands.
379 PeelCounter LHS = calculate(*I->getOperand(0));
380 if (LHS == Unknown)
381 return Unknown;
382 PeelCounter RHS = calculate(*I->getOperand(1));
383 if (RHS == Unknown)
384 return Unknown;
385 return (IterationsToInvarianceOrInduction[I] =
386 mergeTwoCounters(*I, *LHS, *RHS));
387 }
388 if (I->isCast())
389 // Cast instructions get the value of the operand.
390 return (IterationsToInvarianceOrInduction[I] =
391 calculate(*I->getOperand(0)));
392 }
393 // TODO: handle more expressions
394
395 // Everything else is Unknown.
396 assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
397 "unexpected value saved");
398 return Unknown;
399}
400
401std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
402 unsigned Iterations = 0;
403 for (auto &PHI : L.getHeader()->phis()) {
404 PeelCounter ToInvarianceOrInduction = calculate(PHI);
405 if (ToInvarianceOrInduction != Unknown) {
406 unsigned Val = ToInvarianceOrInduction->first;
407 assert(Val <= MaxIterations && "bad result in phi analysis");
408 Iterations = std::max(Iterations, Val);
409 if (Iterations == MaxIterations)
410 break;
411 }
412 }
413 assert((Iterations <= MaxIterations) && "bad result in phi analysis");
414 return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
415}
416
417} // unnamed namespace
418
419// Try to find any invariant memory reads that will become dereferenceable in
420// the remainder loop after peeling. The load must also be used (transitively)
421// by an exit condition. Returns the number of iterations to peel off (at the
422// moment either 0 or 1).
424 DominatorTree &DT,
425 AssumptionCache *AC) {
426 // Skip loops with a single exiting block, because there should be no benefit
427 // for the heuristic below.
428 if (L.getExitingBlock())
429 return 0;
430
431 // All non-latch exit blocks must have an UnreachableInst terminator.
432 // Otherwise the heuristic below may not be profitable.
434 L.getUniqueNonLatchExitBlocks(Exits);
435 if (any_of(Exits, [](const BasicBlock *BB) {
436 return !isa<UnreachableInst>(BB->getTerminator());
437 }))
438 return 0;
439
440 // Now look for invariant loads that dominate the latch and are not known to
441 // be dereferenceable. If there are such loads and no writes, they will become
442 // dereferenceable in the loop if the first iteration is peeled off. Also
443 // collect the set of instructions controlled by such loads. Only peel if an
444 // exit condition uses (transitively) such a load.
445 BasicBlock *Header = L.getHeader();
446 BasicBlock *Latch = L.getLoopLatch();
447 SmallPtrSet<Value *, 8> LoadUsers;
448 const DataLayout &DL = L.getHeader()->getDataLayout();
449 for (BasicBlock *BB : L.blocks()) {
450 for (Instruction &I : *BB) {
451 // Calls that only access inaccessible memory can never alias with loads.
452 if (I.mayWriteToMemory() &&
453 !(isa<CallBase>(I) &&
454 cast<CallBase>(I).onlyAccessesInaccessibleMemory()))
455 return 0;
456
457 if (LoadUsers.contains(&I))
458 LoadUsers.insert_range(I.users());
459 // Do not look for reads in the header; they can already be hoisted
460 // without peeling.
461 if (BB == Header)
462 continue;
463 if (auto *LI = dyn_cast<LoadInst>(&I)) {
464 Value *Ptr = LI->getPointerOperand();
465 if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
466 !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
467 LoadUsers.insert_range(I.users());
468 }
469 }
470 }
471 SmallVector<BasicBlock *> ExitingBlocks;
472 L.getExitingBlocks(ExitingBlocks);
473 if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
474 return LoadUsers.contains(Exiting->getTerminator());
475 }))
476 return 1;
477 return 0;
478}
479
481 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
483 return false;
484
485 // Check if the exit condition of the loop can be adjusted by the peeling
486 // codegen. For now, it must
487 // * exit via the latch,
488 // * the exit condition must be a NE/EQ compare of an induction with step
489 // of 1 and must only be used by the exiting branch.
490 BasicBlock *Latch = L.getLoopLatch();
491 Value *Inc;
492 Value *Bound;
493 CmpPredicate Pred;
494 BasicBlock *Succ1;
495 BasicBlock *Succ2;
496 return Latch && Latch == L.getExitingBlock() &&
497 match(Latch->getTerminator(),
498 m_Br(m_OneUse(m_ICmp(Pred, m_Value(Inc), m_Value(Bound))),
499 m_BasicBlock(Succ1), m_BasicBlock(Succ2))) &&
500 ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) ||
501 (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) &&
502 Bound->getType()->isIntegerTy() &&
503 SE.isLoopInvariant(SE.getSCEV(Bound), &L) &&
504 match(SE.getSCEV(Inc),
506}
507
508/// Returns true if the last iteration can be peeled off and the condition (Pred
509/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition
510/// is known at the second-to-last.
512 const SCEVAddRecExpr *LeftAR,
513 const SCEV *RightSCEV, ScalarEvolution &SE,
514 const TargetTransformInfo &TTI) {
515 if (!canPeelLastIteration(L, SE))
516 return false;
517
518 const SCEV *BTC = SE.getBackedgeTakenCount(&L);
519 SCEVExpander Expander(SE, "loop-peel");
520 if (!SE.isKnownNonZero(BTC) &&
522 L.getLoopPredecessor()->getTerminator()))
523 return false;
524
525 auto Guards = ScalarEvolution::LoopGuards::collect(&L, SE);
526 BTC = SE.applyLoopGuards(BTC, Guards);
527 RightSCEV = SE.applyLoopGuards(RightSCEV, Guards);
528 const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
529 const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
530 SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
531
532 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter,
533 RightSCEV) &&
534 SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV);
535}
536
537// Return the number of iterations to peel off from the beginning and end of the
538// loop respectively, that make conditions in the body true/false. For example,
539// if we peel 2 iterations off the loop below, the condition i < 2 can be
540// evaluated at compile time.
541//
542// for (i = 0; i < n; i++)
543// if (i < 2)
544// ..
545// else
546// ..
547// }
548static std::pair<unsigned, unsigned>
549countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
550 const TargetTransformInfo &TTI) {
551 assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
552 unsigned DesiredPeelCount = 0;
553 unsigned DesiredPeelCountLast = 0;
554
555 // Do not peel the entire loop.
556 const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
557 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
558 MaxPeelCount =
559 std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
560
561 // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
562 // return true if inversed condition become known before reaching the
563 // MaxPeelCount limit.
564 auto PeelWhilePredicateIsKnown =
565 [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
566 const SCEV *Step, ICmpInst::Predicate Pred) {
567 while (PeelCount < MaxPeelCount &&
568 SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
569 IterVal = SE.getAddExpr(IterVal, Step);
570 ++PeelCount;
571 }
572 return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
573 BoundSCEV);
574 };
575
576 const unsigned MaxDepth = 4;
577 std::function<void(Value *, unsigned)> ComputePeelCount =
578 [&](Value *Condition, unsigned Depth) -> void {
579 if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
580 return;
581
582 Value *LeftVal, *RightVal;
583 if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
584 match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
585 ComputePeelCount(LeftVal, Depth + 1);
586 ComputePeelCount(RightVal, Depth + 1);
587 return;
588 }
589
590 CmpPredicate Pred;
591 if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
592 return;
593
594 const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
595 const SCEV *RightSCEV = SE.getSCEV(RightVal);
596
597 // Do not consider predicates that are known to be true or false
598 // independently of the loop iteration.
599 if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
600 return;
601
602 // Check if we have a condition with one AddRec and one non AddRec
603 // expression. Normalize LeftSCEV to be the AddRec.
604 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
605 if (isa<SCEVAddRecExpr>(RightSCEV)) {
606 std::swap(LeftSCEV, RightSCEV);
608 } else
609 return;
610 }
611
612 const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
613
614 // Avoid huge SCEV computations in the loop below, make sure we only
615 // consider AddRecs of the loop we are trying to peel.
616 if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
617 return;
618 if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
619 !SE.getMonotonicPredicateType(LeftAR, Pred))
620 return;
621
622 // Check if extending the current DesiredPeelCount lets us evaluate Pred
623 // or !Pred in the loop body statically.
624 unsigned NewPeelCount = DesiredPeelCount;
625
626 const SCEV *IterVal = LeftAR->evaluateAtIteration(
627 SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
628
629 // If the original condition is not known, get the negated predicate
630 // (which holds on the else branch) and check if it is known. This allows
631 // us to peel of iterations that make the original condition false.
632 if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
634
635 const SCEV *Step = LeftAR->getStepRecurrence(SE);
636 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
637 Pred)) {
638 if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
639 DesiredPeelCountLast = 1;
640 return;
641 }
642
643 // However, for equality comparisons, that isn't always sufficient to
644 // eliminate the comparsion in loop body, we may need to peel one more
645 // iteration. See if that makes !Pred become unknown again.
646 const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
647 if (ICmpInst::isEquality(Pred) &&
648 !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
649 RightSCEV) &&
650 !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
651 SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
652 if (NewPeelCount >= MaxPeelCount)
653 return; // Need to peel one more iteration, but can't. Give up.
654 ++NewPeelCount; // Great!
655 }
656
657 DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
658 DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount);
659 };
660
661 auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
662 if (!MinMax->getType()->isIntegerTy())
663 return;
664 Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
665 const SCEV *BoundSCEV, *IterSCEV;
666 if (L.isLoopInvariant(LHS)) {
667 BoundSCEV = SE.getSCEV(LHS);
668 IterSCEV = SE.getSCEV(RHS);
669 } else if (L.isLoopInvariant(RHS)) {
670 BoundSCEV = SE.getSCEV(RHS);
671 IterSCEV = SE.getSCEV(LHS);
672 } else
673 return;
674 const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
675 // For simplicity, we support only affine recurrences.
676 if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
677 return;
678 const SCEV *Step = AddRec->getStepRecurrence(SE);
679 bool IsSigned = MinMax->isSigned();
680 // To minimize number of peeled iterations, we use strict relational
681 // predicates here.
683 if (SE.isKnownPositive(Step))
684 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
685 else if (SE.isKnownNegative(Step))
686 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
687 else
688 return;
689 // Check that AddRec is not wrapping.
690 if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
691 return;
692 unsigned NewPeelCount = DesiredPeelCount;
693 const SCEV *IterVal = AddRec->evaluateAtIteration(
694 SE.getConstant(AddRec->getType(), NewPeelCount), SE);
695 if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
696 Pred)) {
697 if (shouldPeelLastIteration(L, Pred, AddRec, BoundSCEV, SE, TTI))
698 DesiredPeelCountLast = 1;
699 return;
700 }
701 DesiredPeelCount = NewPeelCount;
702 };
703
704 for (BasicBlock *BB : L.blocks()) {
705 for (Instruction &I : *BB) {
707 ComputePeelCount(SI->getCondition(), 0);
709 ComputePeelCountMinMax(MinMax);
710 }
711
712 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
713 if (!BI)
714 continue;
715
716 // Ignore loop exit condition.
717 if (L.getLoopLatch() == BB)
718 continue;
719
720 ComputePeelCount(BI->getCondition(), 0);
721 }
722
723 return {DesiredPeelCount, DesiredPeelCountLast};
724}
725
726/// This "heuristic" exactly matches implicit behavior which used to exist
727/// inside getLoopEstimatedTripCount. It was added here to keep an
728/// improvement inside that API from causing peeling to become more aggressive.
729/// This should probably be removed.
731 BasicBlock *Latch = L->getLoopLatch();
732 if (!Latch)
733 return true;
734
735 CondBrInst *LatchBR = dyn_cast<CondBrInst>(Latch->getTerminator());
736 if (!LatchBR || !L->isLoopExiting(Latch))
737 return true;
738
739 assert((LatchBR->getSuccessor(0) == L->getHeader() ||
740 LatchBR->getSuccessor(1) == L->getHeader()) &&
741 "At least one edge out of the latch must go to the header");
742
744 L->getUniqueNonLatchExitBlocks(ExitBlocks);
745 return any_of(ExitBlocks, [](const BasicBlock *EB) {
746 return !EB->getTerminatingDeoptimizeCall();
747 });
748}
749
750
751// Return the number of iterations we want to peel off.
752void llvm::computePeelCount(Loop *L, unsigned LoopSize,
754 unsigned TripCount, DominatorTree &DT,
756 AssumptionCache *AC, unsigned Threshold) {
757 assert(LoopSize > 0 && "Zero loop size is not allowed!");
758 // Save the PP.PeelCount value set by the target in
759 // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
760 unsigned TargetPeelCount = PP.PeelCount;
761 PP.PeelCount = 0;
762 PP.PeelLast = false;
763 if (!canPeel(L))
764 return;
765
766 // Only try to peel innermost loops by default.
767 // The constraint can be relaxed by the target in TTI.getPeelingPreferences
768 // or by the flag -unroll-allow-loop-nests-peeling.
769 if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
770 return;
771
772 // If the user provided a peel count, use that.
773 bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
774 if (UserPeelCount) {
775 LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
776 << " iterations.\n");
778 PP.PeelProfiledIterations = true;
779 return;
780 }
781
782 // Skip peeling if it's disabled.
783 if (!PP.AllowPeeling)
784 return;
785
786 // Check that we can peel at least one iteration.
787 if (2 * LoopSize > Threshold)
788 return;
789
790 unsigned AlreadyPeeled = 0;
792 AlreadyPeeled = *Peeled;
793 // Stop if we already peeled off the maximum number of iterations.
794 if (AlreadyPeeled >= UnrollPeelMaxCount)
795 return;
796
797 // Pay respect to limitations implied by loop size and the max peel count.
798 unsigned MaxPeelCount = UnrollPeelMaxCount;
799 MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
800
801 // Start the max computation with the PP.PeelCount value set by the target
802 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
803 unsigned DesiredPeelCount = TargetPeelCount;
804
805 // Here we try to get rid of Phis which become invariants or inductions after
806 // 1, 2, ..., N iterations of the loop. For this we compute the number for
807 // iterations after which every Phi is guaranteed to become an invariant or an
808 // induction, and try to peel the maximum number of iterations among these
809 // values, thus turning all those Phis into invariants or inductions.
810 if (MaxPeelCount > DesiredPeelCount) {
811 // Check how many iterations are useful for resolving Phis
812 auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
813 .calculateIterationsToPeel();
814 if (NumPeels)
815 DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
816 }
817
818 const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
819 countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
820 DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);
821
822 if (DesiredPeelCount == 0)
823 DesiredPeelCount = peelToTurnInvariantLoadsDereferenceable(*L, DT, AC);
824
825 if (DesiredPeelCount > 0) {
826 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
827 // Consider max peel count limitation.
828 assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
829 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
830 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
831 << " iteration(s) to turn"
832 << " some Phis into invariants or inductions.\n");
833 PP.PeelCount = DesiredPeelCount;
834 PP.PeelProfiledIterations = false;
835 PP.PeelLast = false;
836 return;
837 }
838 }
839
840 if (CountToEliminateCmpsLast > 0) {
841 unsigned DesiredPeelCountLast =
842 std::min(CountToEliminateCmpsLast, MaxPeelCount);
843 // Consider max peel count limitation.
844 assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?");
845 if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) {
846 LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
847 << " iteration(s) to turn"
848 << " some Phis into invariants.\n");
849 PP.PeelCount = DesiredPeelCountLast;
850 PP.PeelProfiledIterations = false;
851 PP.PeelLast = true;
852 return;
853 }
854 }
855
856 // Bail if we know the statically calculated trip count.
857 // In this case we rather prefer partial unrolling.
858 if (TripCount)
859 return;
860
861 // Do not apply profile base peeling if it is disabled.
863 return;
864 // If we don't know the trip count, but have reason to believe the average
865 // trip count is low, peeling should be beneficial, since we will usually
866 // hit the peeled section.
867 // We only do this in the presence of profile information, since otherwise
868 // our estimates of the trip count are not reliable enough.
869 if (L->getHeader()->getParent()->hasProfileData()) {
871 return;
872 std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
873 if (!EstimatedTripCount)
874 return;
875
876 LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
877 << *EstimatedTripCount << "\n");
878
879 if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
880 unsigned PeelCount = *EstimatedTripCount;
881 LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
882 PP.PeelCount = PeelCount;
883 return;
884 }
885 LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
886 LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
887 LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
888 LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
889 LLVM_DEBUG(dbgs() << "Max peel count by cost: "
890 << (Threshold / LoopSize - 1) << "\n");
891 }
892}
893
894/// Clones the body of the loop L, putting it between \p InsertTop and \p
895/// InsertBot.
896/// \param IterNumber The serial number of the iteration currently being
897/// peeled off.
898/// \param PeelLast Peel off the last iterations from \p L.
899/// \param ExitEdges The exit edges of the original loop.
900/// \param[out] NewBlocks A list of the blocks in the newly created clone
901/// \param[out] VMap The value map between the loop and the new clone.
902/// \param LoopBlocks A helper for DFS-traversal of the loop.
903/// \param LVMap A value-map that maps instructions from the original loop to
904/// instructions in the last peeled-off iteration.
905static void cloneLoopBlocks(
906 Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
907 BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
908 SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
909 SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
911 LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
912 ScalarEvolution &SE) {
913 BasicBlock *Header = L->getHeader();
914 BasicBlock *Latch = L->getLoopLatch();
915 BasicBlock *PreHeader = L->getLoopPreheader();
916
917 Function *F = Header->getParent();
918 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
919 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
920 Loop *ParentLoop = L->getParentLoop();
921
922 // For each block in the original loop, create a new copy,
923 // and update the value map with the newly created values.
924 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
925 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
926 NewBlocks.push_back(NewBB);
927
928 // If an original block is an immediate child of the loop L, its copy
929 // is a child of a ParentLoop after peeling. If a block is a child of
930 // a nested loop, it is handled in the cloneLoop() call below.
931 if (ParentLoop && LI->getLoopFor(*BB) == L)
932 ParentLoop->addBasicBlockToLoop(NewBB, *LI);
933
934 VMap[*BB] = NewBB;
935
936 // If dominator tree is available, insert nodes to represent cloned blocks.
937 if (DT) {
938 if (Header == *BB)
939 DT->addNewBlock(NewBB, InsertTop);
940 else {
941 DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
942 // VMap must contain entry for IDom, as the iteration order is RPO.
943 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
944 }
945 }
946 }
947
948 {
949 // Identify what other metadata depends on the cloned version. After
950 // cloning, replace the metadata with the corrected version for both
951 // memory instructions and noalias intrinsics.
952 std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
953 cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
954 Header->getContext(), Ext);
955 }
956
957 // Recursively create the new Loop objects for nested loops, if any,
958 // to preserve LoopInfo.
959 for (Loop *ChildLoop : *L) {
960 cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
961 }
962
963 // Hook-up the control flow for the newly inserted blocks.
964 // The new header is hooked up directly to the "top", which is either
965 // the original loop preheader (for the first iteration) or the previous
966 // iteration's exiting block (for every other iteration)
967 InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
968
969 // Similarly, for the latch:
970 // The original exiting edge is still hooked up to the loop exit.
971 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
972 if (PeelLast) {
973 // This is the last iteration and we definitely will go to the exit. Just
974 // set both successors to InsertBot and let the branch be simplified later.
975 assert(IterNumber == 0 && "Only peeling a single iteration implemented.");
976 auto *LatchTerm = cast<CondBrInst>(NewLatch->getTerminator());
977 LatchTerm->setSuccessor(0, InsertBot);
978 LatchTerm->setSuccessor(1, InsertBot);
979 } else {
980 auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
981 // The backedge now goes to the "bottom", which is either the loop's real
982 // header (for the last peeled iteration) or the copied header of the next
983 // iteration (for every other iteration)
984 for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) {
985 if (LatchTerm->getSuccessor(idx) == Header) {
986 LatchTerm->setSuccessor(idx, InsertBot);
987 break;
988 }
989 }
990 }
991 if (DT)
992 DT->changeImmediateDominator(InsertBot, NewLatch);
993
994 // The new copy of the loop body starts with a bunch of PHI nodes
995 // that pick an incoming value from either the preheader, or the previous
996 // loop iteration. Since this copy is no longer part of the loop, we
997 // resolve this statically:
998 if (PeelLast) {
999 // For the last iteration, we introduce new phis for each header phi in
1000 // InsertTop, using the incoming value from the preheader for the original
1001 // preheader (when skipping the main loop) and the incoming value from the
1002 // latch for the latch (when continuing from the main loop).
1003 IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt());
1004 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1005 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1006 PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
1007 NewPHI->eraseFromParent();
1008 if (OrigPreHeader)
1009 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
1010 OrigPreHeader);
1011
1012 PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
1013 Latch);
1014 VMap[&*I] = PN;
1015 }
1016 } else {
1017 // For the first iteration, we use the value from the preheader directly.
1018 // For any other iteration, we replace the phi with the value generated by
1019 // the immediately preceding clone of the loop body (which represents
1020 // the previous iteration).
1021 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1022 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
1023 if (IterNumber == 0) {
1024 VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
1025 } else {
1026 Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
1027 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1028 if (LatchInst && L->contains(LatchInst))
1029 VMap[&*I] = LVMap[LatchInst];
1030 else
1031 VMap[&*I] = LatchVal;
1032 }
1033 NewPHI->eraseFromParent();
1034 }
1035 }
1036
1037 // Fix up the outgoing values - we need to add a value for the iteration
1038 // we've just created. Note that this must happen *after* the incoming
1039 // values are adjusted, since the value going out of the latch may also be
1040 // a value coming into the header.
1041 for (auto Edge : ExitEdges)
1042 for (PHINode &PHI : Edge.second->phis()) {
1043 Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
1044 Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
1045 if (LatchInst && L->contains(LatchInst))
1046 LatchVal = VMap[LatchVal];
1047 PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
1049 }
1050
1051 // LastValueMap is updated with the values for the current loop
1052 // which are used the next time this function is called.
1053 for (auto KV : VMap)
1054 LVMap[KV.first] = KV.second;
1055}
1056
1057TargetTransformInfo::PeelingPreferences
1059 const TargetTransformInfo &TTI,
1060 std::optional<bool> UserAllowPeeling,
1061 std::optional<bool> UserAllowProfileBasedPeeling,
1062 bool UnrollingSpecficValues) {
1064
1065 // Set the default values.
1066 PP.PeelCount = 0;
1067 PP.AllowPeeling = true;
1068 PP.AllowLoopNestsPeeling = false;
1069 PP.PeelLast = false;
1070 PP.PeelProfiledIterations = true;
1071
1072 // Get the target specifc values.
1073 TTI.getPeelingPreferences(L, SE, PP);
1074
1075 // User specified values using cl::opt.
1076 if (UnrollingSpecficValues) {
1077 if (UnrollPeelCount.getNumOccurrences() > 0)
1079 if (UnrollAllowPeeling.getNumOccurrences() > 0)
1081 if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
1083 }
1084
1085 // User specifed values provided by argument.
1086 if (UserAllowPeeling)
1087 PP.AllowPeeling = *UserAllowPeeling;
1088 if (UserAllowProfileBasedPeeling)
1089 PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
1090
1091 return PP;
1092}
1093
1094/// Peel off the first \p PeelCount iterations of loop \p L.
1095///
1096/// Note that this does not peel them off as a single straight-line block.
1097/// Rather, each iteration is peeled off separately, and needs to check the
1098/// exit condition.
1099/// For loops that dynamically execute \p PeelCount iterations or less
1100/// this provides a benefit, since the peeled off iterations, which account
1101/// for the bulk of dynamic execution, can be further simplified by scalar
1102/// optimizations.
1103void llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1105 bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
1106 assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
1107 assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
1108 assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) &&
1109 "when peeling the last iteration, the loop must be supported and can "
1110 "only peel a single iteration");
1111
1112 LoopBlocksDFS LoopBlocks(L);
1113 LoopBlocks.perform(LI);
1114
1115 BasicBlock *Header = L->getHeader();
1116 BasicBlock *PreHeader = L->getLoopPreheader();
1117 BasicBlock *Latch = L->getLoopLatch();
1119 L->getExitEdges(ExitEdges);
1120
1121 // Remember dominators of blocks we might reach through exits to change them
1122 // later. Immediate dominator of such block might change, because we add more
1123 // routes which can lead to the exit: we can reach it from the peeled
1124 // iterations too.
1125 MapVector<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
1126 for (auto *BB : L->blocks()) {
1127 auto *BBDomNode = DT.getNode(BB);
1128 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
1129 for (auto *ChildDomNode : BBDomNode->children()) {
1130 auto *ChildBB = ChildDomNode->getBlock();
1131 if (!L->contains(ChildBB))
1132 ChildrenToUpdate.push_back(ChildBB);
1133 }
1134 // The new idom of the block will be the nearest common dominator
1135 // of all copies of the previous idom. This is equivalent to the
1136 // nearest common dominator of the previous idom and the first latch,
1137 // which dominates all copies of the previous idom.
1138 BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
1139 for (auto *ChildBB : ChildrenToUpdate)
1140 NonLoopBlocksIDom[ChildBB] = NewIDom;
1141 }
1142
1143 Function *F = Header->getParent();
1144
1145 // Set up all the necessary basic blocks.
1146 BasicBlock *InsertTop;
1147 BasicBlock *InsertBot;
1148 BasicBlock *NewPreHeader = nullptr;
1150 if (PeelLast) {
1151 // It is convenient to split the single exit block from the latch the
1152 // into 3 parts - two blocks to anchor the peeled copy of the loop body,
1153 // and a new final exit block.
1154
1155 // Peeling the last iteration transforms.
1156 //
1157 // PreHeader:
1158 // ...
1159 // Header:
1160 // LoopBody
1161 // If (cond) goto Header
1162 // Exit:
1163 //
1164 // into
1165 //
1166 // Header:
1167 // LoopBody
1168 // If (cond) goto Header
1169 // InsertTop:
1170 // LoopBody
1171 // If (!cond) goto InsertBot
1172 // InsertBot:
1173 // Exit:
1174 // ...
1175 BasicBlock *Exit = L->getExitBlock();
1176 for (PHINode &P : Exit->phis())
1177 ExitValues[&P] = P.getIncomingValueForBlock(Latch);
1178
1179 const SCEV *BTC = SE->getBackedgeTakenCount(L);
1180
1181 InsertTop = SplitEdge(Latch, Exit, &DT, LI);
1182 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1183
1184 InsertTop->setName(Exit->getName() + ".peel.begin");
1185 InsertBot->setName(Exit->getName() + ".peel.next");
1186 NewPreHeader = nullptr;
1187
1188 // If the original loop may only execute a single iteration we need to
1189 // insert a trip count check and skip the original loop with the last
1190 // iteration peeled off if necessary. Either way, we must update branch
1191 // weights to maintain the loop body frequency.
1192 if (SE->isKnownNonZero(BTC)) {
1193 // We have just proven that, when reached, the original loop always
1194 // executes at least two iterations. Thus, we unconditionally execute
1195 // both the remaining loop's initial iteration and the peeled iteration.
1196 // But that increases the latter's frequency above its frequency in the
1197 // original loop. To maintain the total frequency, we compensate by
1198 // decreasing the remaining loop body's frequency to indicate one less
1199 // iteration.
1200 //
1201 // We use this formula to convert probability to/from frequency:
1202 // Sum(i=0..inf)(P^i) = 1/(1-P) = Freq.
1203 if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) {
1204 // Trying to subtract one from an infinite loop is pointless, and our
1205 // formulas then produce division by zero, so skip that case.
1206 if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) {
1207 double Freq = 1 / ExitP.toDouble();
1208 // No branch weights can produce a frequency of less than one given
1209 // the initial iteration, and our formulas produce a negative
1210 // probability if we try.
1211 assert(Freq >= 1.0 && "expected freq >= 1 due to initial iteration");
1212 double NewFreq = std::max(Freq - 1, 1.0);
1214 L, BranchProbability::getBranchProbability(1 - 1 / NewFreq));
1215 }
1216 }
1217 } else {
1218 NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
1219 SCEVExpander Expander(*SE, "loop-peel");
1220
1221 Instruction *PreHeaderBR = PreHeader->getTerminator();
1222 Value *BTCValue =
1223 Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
1224 IRBuilder<> B(PreHeaderBR);
1225 Value *Cond =
1226 B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
1227 auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
1228 SmallVector<uint32_t> Weights;
1229 auto *OrigLatchBr = Latch->getTerminator();
1230 auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
1231 extractBranchWeights(*OrigLatchBr, Weights);
1232 if (HasBranchWeights) {
1233 // The probability that the new guard skips the loop to execute just one
1234 // iteration is the original loop's probability of exiting at the latch
1235 // after any iteration. That should maintain the original loop body
1236 // frequency. Upon arriving at the loop, due to the guard, the
1237 // probability of reaching iteration i of the new loop is the
1238 // probability of reaching iteration i+1 of the original loop. The
1239 // probability of reaching the peeled iteration is 1, which is the
1240 // probability of reaching iteration 0 of the original loop.
1241 if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
1242 std::swap(Weights[0], Weights[1]);
1243 setBranchWeights(*BI, Weights, /*IsExpected=*/false);
1244 }
1245 PreHeaderBR->eraseFromParent();
1246
1247 // PreHeader now dominates InsertTop.
1248 DT.changeImmediateDominator(InsertTop, PreHeader);
1249 }
1250 } else {
1251 // It is convenient to split the preheader into 3 parts - two blocks to
1252 // anchor the peeled copy of the loop body, and a new preheader for the
1253 // "real" loop.
1254
1255 // Peeling the first iteration transforms.
1256 //
1257 // PreHeader:
1258 // ...
1259 // Header:
1260 // LoopBody
1261 // If (cond) goto Header
1262 // Exit:
1263 //
1264 // into
1265 //
1266 // InsertTop:
1267 // LoopBody
1268 // If (!cond) goto Exit
1269 // InsertBot:
1270 // NewPreHeader:
1271 // ...
1272 // Header:
1273 // LoopBody
1274 // If (cond) goto Header
1275 // Exit:
1276 //
1277 // Each following iteration will split the current bottom anchor in two,
1278 // and put the new copy of the loop body between these two blocks. That
1279 // is, after peeling another iteration from the example above, we'll
1280 // split InsertBot, and get:
1281 //
1282 // InsertTop:
1283 // LoopBody
1284 // If (!cond) goto Exit
1285 // InsertBot:
1286 // LoopBody
1287 // If (!cond) goto Exit
1288 // InsertBot.next:
1289 // NewPreHeader:
1290 // ...
1291 // Header:
1292 // LoopBody
1293 // If (cond) goto Header
1294 // Exit:
1295 //
1296 InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1297 InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1298 NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1299
1300 InsertTop->setName(Header->getName() + ".peel.begin");
1301 InsertBot->setName(Header->getName() + ".peel.next");
1302 NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1303 }
1304
1305 Instruction *LatchTerm =
1306 cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1307
1308 // Identify what noalias metadata is inside the loop: if it is inside the
1309 // loop, the associated metadata must be cloned for each iteration.
1310 SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1311 identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1312
1313 // For each peeled-off iteration, make a copy of the loop.
1314 ValueToValueMapTy VMap;
1315 for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1317
1318 cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
1319 NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
1320 LoopBlocks, VMap, LVMap, &DT, LI,
1321 LoopLocalNoAliasDeclScopes, *SE);
1322
1323 // Remap to use values from the current iteration instead of the
1324 // previous one.
1325 remapInstructionsInBlocks(NewBlocks, VMap);
1326
1327 if (Iter == 0) {
1328 if (PeelLast) {
1329 // Adjust the exit condition so the loop exits one iteration early.
1330 // For now we simply subtract one form the second operand of the
1331 // exit condition. This relies on the peel count computation to
1332 // check that this is actually legal. In particular, it ensures that
1333 // the first operand of the compare is an AddRec with step 1 and we
1334 // execute more than one iteration.
1335 auto *Cmp =
1336 cast<ICmpInst>(L->getLoopLatch()->getTerminator()->getOperand(0));
1337 IRBuilder B(Cmp);
1338 Cmp->setOperand(
1339 1, B.CreateSub(Cmp->getOperand(1),
1340 ConstantInt::get(Cmp->getOperand(1)->getType(), 1)));
1341 } else {
1342 // Update IDoms of the blocks reachable through exits.
1343 for (auto BBIDom : NonLoopBlocksIDom)
1344 DT.changeImmediateDominator(BBIDom.first,
1345 cast<BasicBlock>(LVMap[BBIDom.second]));
1346 }
1347 }
1348
1349#ifdef EXPENSIVE_CHECKS
1350 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1351#endif
1352
1353 // Remove Loop metadata from the latch branch instruction
1354 // because it is not the Loop's latch branch anymore.
1355 auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1356 LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1357
1358 InsertTop = InsertBot;
1359 InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1360 InsertBot->setName(Header->getName() + ".peel.next");
1361
1362 F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1363 F->end());
1364 }
1365
1366 if (PeelLast) {
1367 // Now adjust users of the original exit values by replacing them with the
1368 // exit value from the peeled iteration and remove them.
1369 for (const auto &[P, E] : ExitValues) {
1370 Instruction *ExitInst = dyn_cast<Instruction>(E);
1371 if (ExitInst && L->contains(ExitInst))
1372 P->replaceAllUsesWith(&*VMap[ExitInst]);
1373 else
1374 P->replaceAllUsesWith(E);
1375 P->eraseFromParent();
1376 }
1377 formLCSSA(*L, DT, LI, SE);
1378 } else {
1379 // Now adjust the phi nodes in the loop header to get their initial values
1380 // from the last peeled-off iteration instead of the preheader.
1381 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1383 Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1384 Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1385 if (LatchInst && L->contains(LatchInst))
1386 NewVal = LVMap[LatchInst];
1387
1388 PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1389 }
1390 }
1391
1392 // Update Metadata for count of peeled off iterations.
1393 unsigned AlreadyPeeled = 0;
1395 AlreadyPeeled = *Peeled;
1396 unsigned TotalPeeled = AlreadyPeeled + PeelCount;
1398
1399 // Update metadata for the estimated trip count. The original branch weight
1400 // metadata is already correct for both the remaining loop and the peeled loop
1401 // iterations, so do not adjust it.
1402 //
1403 // For example, consider what happens when peeling 2 iterations from a loop
1404 // with an estimated trip count of 10 and inserting them before the remaining
1405 // loop. Each of the peeled iterations and each iteration in the remaining
1406 // loop still has the same probability of exiting the *entire original* loop
1407 // as it did when in the original loop, and thus it should still have the same
1408 // branch weights. The peeled iterations' non-zero probabilities of exiting
1409 // already appropriately reduce the probability of reaching the remaining
1410 // iterations just as they did in the original loop. Trying to also adjust
1411 // the remaining loop's branch weights to reflect its new trip count of 8 will
1412 // erroneously further reduce its block frequencies. However, in case an
1413 // analysis later needs to determine the trip count of the remaining loop
1414 // while examining it in isolation without considering the probability of
1415 // actually reaching it, we store the new trip count as separate metadata.
1416 if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
1417 unsigned EstimatedTripCountNew = *EstimatedTripCount;
1418 if (EstimatedTripCountNew < TotalPeeled)
1419 EstimatedTripCountNew = 0;
1420 else
1421 EstimatedTripCountNew -= TotalPeeled;
1422 setLoopEstimatedTripCount(L, EstimatedTripCountNew);
1423 }
1424
1425 if (Loop *ParentLoop = L->getParentLoop())
1426 L = ParentLoop;
1427
1428 // We modified the loop, update SE.
1429 SE->forgetTopmostLoop(L);
1431
1432#ifdef EXPENSIVE_CHECKS
1433 // Finally DomtTree must be correct.
1434 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1435#endif
1436
1437 // FIXME: Incrementally update loop-simplify
1438 simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1439
1440 NumPeeled++;
1441 NumPeeledEnd += PeelLast;
1442}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Returns true if the last iteration can be peeled off and the condition (Pred LeftAR,...
Definition LoopPeel.cpp:511
static bool violatesLegacyMultiExitLoopCheck(Loop *L)
This "heuristic" exactly matches implicit behavior which used to exist inside getLoopEstimatedTripCou...
Definition LoopPeel.cpp:730
static std::pair< unsigned, unsigned > countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Definition LoopPeel.cpp:549
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *OrigPreHeader, SmallVectorImpl< std::pair< BasicBlock *, BasicBlock * > > &ExitEdges, SmallVectorImpl< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI, ArrayRef< MDNode * > LoopLocalNoAliasDeclScopes, ScalarEvolution &SE)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
Definition LoopPeel.cpp:905
static unsigned peelToTurnInvariantLoadsDereferenceable(Loop &L, DominatorTree &DT, AssumptionCache *AC)
Definition LoopPeel.cpp:423
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
#define P(N)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
DomTreeNodeBase * getIDom() const
NodeT * getBlock() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Store the result of a depth first search within basic blocks contained by a single loop.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
RPOIterator endRPO() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
This class represents min/max intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This node represents a polynomial recurrence on the trip count of the specified loop.
LLVM_ABI const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
SCEVUse getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
static LLVM_ABI LoopGuards collect(const Loop *L, ScalarEvolution &SE)
Collect rewrite map for loop guards for loop L, together with flags indicating if NUW and NSW can be ...
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
LLVM_ABI std::optional< MonotonicPredicateType > getMonotonicPredicateType(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred)
If, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or decreasing,...
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI std::optional< bool > evaluatePredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Check whether the condition described by Pred, LHS, and RHS is true or false.
This class represents the LLVM 'select' instruction.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
self_iterator getIterator()
Definition ilist_node.h:123
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_scev_One()
Match an integer 1.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI std::optional< unsigned > getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight=nullptr)
Return either:
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static cl::opt< bool > DisableAdvancedPeeling("disable-advanced-peeling", cl::init(false), cl::Hidden, cl::desc("Disable advance peeling. Issues for convergent targets (D134803)."))
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
static cl::opt< bool > EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, cl::desc("Enable peeling to convert Phi nodes into IVs"))
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool canPeel(const Loop *L)
Definition LoopPeel.cpp:96
bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE)
Returns true if the last iteration of L can be peeled off.
Definition LoopPeel.cpp:480
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static const char * PeeledCountMetaData
Definition LoopPeel.cpp:90
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache *AC=nullptr, unsigned Threshold=UINT_MAX)
Definition LoopPeel.cpp:752
LLVM_ABI cl::opt< unsigned > SCEVCheapExpansionBudget
BranchProbability getLoopProbability(Loop *L)
Based on branch weight metadata, return either:
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &VMap)
VMap is the value-map that maps instructions from the original loop to instructions in the last peele...
LLVM_ABI std::optional< int > getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name)
Find named metadata for a loop with an integer value.
bool setLoopProbability(Loop *L, BranchProbability P)
Set branch weight metadata for the latch of L to indicate that, at the end of any iteration,...
TargetTransformInfo TTI
static cl::opt< bool > UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, cl::desc("Allows loop nests to be peeled."))
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
LLVM_ABI void cloneAndAdaptNoAliasScopes(ArrayRef< MDNode * > NoAliasDeclScopes, ArrayRef< BasicBlock * > NewBlocks, LLVMContext &Context, StringRef Ext)
Clone the specified noalias decl scopes.
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void identifyNoAliasScopesToClone(ArrayRef< BasicBlock * > BBs, SmallVectorImpl< MDNode * > &NoAliasDeclScopes)
Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified basic blocks and extract ...
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
LLVM_ABI Loop * cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM)
Recursively clone the specified loop and all of its children, mapping the blocks with the specified m...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelLast
Peel off the last PeelCount loop iterations.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...