LLVM 23.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Module.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespace llvm;
72using namespace llvm::PatternMatch;
73
74STATISTIC(NumBranches, "Number of branches unswitched");
75STATISTIC(NumSwitches, "Number of switches unswitched");
76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
85namespace llvm {
87 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
88 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
89 "following the configuration passed into the pass."));
90
91static cl::opt<int>
92 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
93 cl::desc("The cost threshold for unswitching a loop."));
94
96 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
97 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
98 "explosion in nontrivial unswitch."));
100 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
101 cl::desc("Toplevel siblings divisor for cost multiplier."));
103 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
104 cl::desc("Outer loop size divisor for cost multiplier."));
106 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
107 cl::desc("Number of unswitch candidates that are ignored when calculating "
108 "cost multiplier."));
110 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
111 cl::desc("If enabled, simple loop unswitching will also consider "
112 "llvm.experimental.guard intrinsics as unswitch candidates."));
114 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
115 cl::init(false), cl::Hidden,
116 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
117 "null checks to save time analyzing if we can keep it."));
119 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
120 cl::desc("Max number of memory uses to explore during "
121 "partial unswitching analysis"),
122 cl::init(100), cl::Hidden);
124 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
125 cl::desc("If enabled, the freeze instruction will be added to condition "
126 "of loop unswitch to prevent miscompilation."));
127
129 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
130 cl::desc("Whether we should inject new invariants and unswitch them to "
131 "eliminate some existing (non-invariant) conditions."),
132 cl::init(true));
133
135 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
137 cl::desc("Only try to inject loop invariant conditions and "
138 "unswitch on them to eliminate branches that are "
139 "not-taken 1/<this option> times or less."),
140 cl::init(16));
141
142static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
143 cl::Hidden, cl::init(true));
145} // namespace llvm
146
148namespace {
149struct CompareDesc {
150 CondBrInst *Term;
151 Value *Invariant;
152 BasicBlock *InLoopSucc;
153
154 CompareDesc(CondBrInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
155 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
156};
157
158struct InjectedInvariant {
159 ICmpInst::Predicate Pred;
160 Value *LHS;
161 Value *RHS;
162 BasicBlock *InLoopSucc;
163
164 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
165 BasicBlock *InLoopSucc)
166 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
167};
168
169struct NonTrivialUnswitchCandidate {
170 Instruction *TI = nullptr;
171 TinyPtrVector<Value *> Invariants;
172 std::optional<InstructionCost> Cost;
173 std::optional<InjectedInvariant> PendingInjection;
174 NonTrivialUnswitchCandidate(
175 Instruction *TI, ArrayRef<Value *> Invariants,
176 std::optional<InstructionCost> Cost = std::nullopt,
177 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
178 : TI(TI), Invariants(Invariants), Cost(Cost),
179 PendingInjection(PendingInjection) {};
180
181 bool hasPendingInjection() const { return PendingInjection.has_value(); }
182};
183} // end anonymous namespace.
184
185// Helper to skip (select x, true, false), which matches both a logical AND and
186// OR and can confuse code that tries to determine if \p Cond is either a
187// logical AND or OR but not both.
189 Value *CondNext;
190 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
191 Cond = CondNext;
192 return Cond;
193}
194
195/// Collect all of the loop invariant input values transitively used by the
196/// homogeneous instruction graph from a given root.
197///
198/// This essentially walks from a root recursively through loop variant operands
199/// which have perform the same logical operation (AND or OR) and finds all
200/// inputs which are loop invariant. For some operations these can be
201/// re-associated and unswitched out of the loop entirely.
204 const LoopInfo &LI) {
205 assert(!L.isLoopInvariant(&Root) &&
206 "Only need to walk the graph if root itself is not invariant.");
207 TinyPtrVector<Value *> Invariants;
208
209 bool IsRootAnd = match(&Root, m_LogicalAnd());
210 bool IsRootOr = match(&Root, m_LogicalOr());
211
212 // Build a worklist and recurse through operators collecting invariants.
215 Worklist.push_back(&Root);
216 Visited.insert(&Root);
217 do {
218 Instruction &I = *Worklist.pop_back_val();
219 for (Value *OpV : I.operand_values()) {
220 // Skip constants as unswitching isn't interesting for them.
221 if (isa<Constant>(OpV))
222 continue;
223
224 // Add it to our result if loop invariant.
225 if (L.isLoopInvariant(OpV)) {
226 Invariants.push_back(OpV);
227 continue;
228 }
229
230 // If not an instruction with the same opcode, nothing we can do.
232
233 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
234 (IsRootOr && match(OpI, m_LogicalOr())))) {
235 // Visit this operand.
236 if (Visited.insert(OpI).second)
237 Worklist.push_back(OpI);
238 }
239 }
240 } while (!Worklist.empty());
241
242 return Invariants;
243}
244
245static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
246 Constant &Replacement) {
247 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
248
249 // Replace uses of LIC in the loop with the given constant.
250 // We use make_early_inc_range as set invalidates the iterator.
251 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
252 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
253
254 // Replace this use within the loop body.
255 if (UserI && L.contains(UserI))
256 U.set(&Replacement);
257 }
258}
259
260/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
261/// incoming values along this edge.
263 const BasicBlock &ExitingBB,
264 const BasicBlock &ExitBB) {
265 for (const Instruction &I : ExitBB) {
266 auto *PN = dyn_cast<PHINode>(&I);
267 if (!PN)
268 // No more PHIs to check.
269 return true;
270
271 // If the incoming value for this edge isn't loop invariant the unswitch
272 // won't be trivial.
273 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
274 return false;
275 }
276 llvm_unreachable("Basic blocks should never be empty!");
277}
278
279/// Copy a set of loop invariant values \p Invariants and insert them at the
280/// end of \p BB and conditionally branch on the copied condition. We only
281/// branch on a single value.
282/// We attempt to estimate the profile of the resulting conditional branch from
283/// \p ComputeProfFrom, which is the original conditional branch we're
284/// unswitching.
285/// When \p Direction is true, the \p Invariants form a disjunction, and the
286/// branch conditioned on it exits the loop on the "true" case. When \p
287/// Direction is false, the \p Invariants form a conjunction and the branch
288/// exits on the "false" case.
290 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
291 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
292 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
293 const CondBrInst &ComputeProfFrom) {
294
295 SmallVector<uint32_t> BranchWeights;
296 bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
297 extractBranchWeights(ComputeProfFrom, BranchWeights);
298 // If Direction is true, that means we had a disjunction and that the "true"
299 // case exits. The probability of the disjunction of the subset of terms is at
300 // most as high as the original one. So, if the probability is higher than the
301 // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
302 // but if it's lower, we will use the original probability.
303 // Conversely, if Direction is false, that means we had a conjunction, and the
304 // probability of exiting is captured in the second branch weight. That
305 // probability is a disjunction (of the negation of the original terms). The
306 // same reasoning applies as above.
307 // Issue #165649: should we expect BFI to conserve, and use that to calculate
308 // the branch weights?
309 if (HasBranchWeights &&
310 static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
311 static_cast<double>(sum_of(BranchWeights)) >
312 0.5)
313 HasBranchWeights = false;
314
315 IRBuilder<> IRB(&BB);
317
318 SmallVector<Value *> FrozenInvariants;
319 for (Value *Inv : Invariants) {
320 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
321 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
322 FrozenInvariants.push_back(Inv);
323 }
324
325 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
326 : IRB.CreateAnd(FrozenInvariants);
327 auto *BR = IRB.CreateCondBr(
328 Cond, Direction ? &UnswitchedSucc : &NormalSucc,
329 Direction ? &NormalSucc : &UnswitchedSucc,
330 HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
331 : nullptr);
332 if (!HasBranchWeights)
334}
335
336/// Copy a set of loop invariant values, and conditionally branch on them.
338 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
339 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
340 MemorySSAUpdater *MSSAU, const CondBrInst &OriginalBranch) {
342 for (auto *Val : reverse(ToDuplicate)) {
343 Instruction *Inst = cast<Instruction>(Val);
344 Instruction *NewInst = Inst->clone();
345
346 if (const DebugLoc &DL = Inst->getDebugLoc())
347 mapAtomInstance(DL, VMap);
348
349 NewInst->insertInto(&BB, BB.end());
350 RemapInstruction(NewInst, VMap,
352 VMap[Val] = NewInst;
353
354 if (!MSSAU)
355 continue;
356
357 MemorySSA *MSSA = MSSAU->getMemorySSA();
358 if (auto *MemUse =
360 auto *DefiningAccess = MemUse->getDefiningAccess();
361 // Get the first defining access before the loop.
362 while (L.contains(DefiningAccess->getBlock())) {
363 // If the defining access is a MemoryPhi, get the incoming
364 // value for the pre-header as defining access.
365 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
366 DefiningAccess =
367 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
368 else
369 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
370 }
371 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
372 NewInst->getParent(),
374 }
375 }
376
377 IRBuilder<> IRB(&BB);
379 Value *Cond = VMap[ToDuplicate[0]];
380 // The expectation is that ToDuplicate[0] is the condition used by the
381 // OriginalBranch, case in which we can clone the profile metadata from there.
382 auto *ProfData =
384 ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
385 ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
386 : nullptr;
387 auto *BR =
388 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
389 Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
390 if (!ProfData)
392}
393
394/// Rewrite the PHI nodes in an unswitched loop exit basic block.
395///
396/// Requires that the loop exit and unswitched basic block are the same, and
397/// that the exiting block was a unique predecessor of that block. Rewrites the
398/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
399/// PHI nodes from the old preheader that now contains the unswitched
400/// terminator.
402 BasicBlock &OldExitingBB,
403 BasicBlock &OldPH) {
404 for (PHINode &PN : UnswitchedBB.phis()) {
405 // When the loop exit is directly unswitched we just need to update the
406 // incoming basic block. We loop to handle weird cases with repeated
407 // incoming blocks, but expect to typically only have one operand here.
408 for (auto i : seq<int>(0, PN.getNumOperands())) {
409 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
410 "Found incoming block different from unique predecessor!");
411 PN.setIncomingBlock(i, &OldPH);
412 }
413 }
414}
415
416/// Rewrite the PHI nodes in the loop exit basic block and the split off
417/// unswitched block.
418///
419/// Because the exit block remains an exit from the loop, this rewrites the
420/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
421/// nodes into the unswitched basic block to select between the value in the
422/// old preheader and the loop exit.
424 BasicBlock &UnswitchedBB,
425 BasicBlock &OldExitingBB,
426 BasicBlock &OldPH,
427 bool FullUnswitch) {
428 assert(&ExitBB != &UnswitchedBB &&
429 "Must have different loop exit and unswitched blocks!");
430 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
431 for (PHINode &PN : ExitBB.phis()) {
432 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
433 PN.getName() + ".split");
434 NewPN->insertBefore(InsertPt);
435
436 // Walk backwards over the old PHI node's inputs to minimize the cost of
437 // removing each one. We have to do this weird loop manually so that we
438 // create the same number of new incoming edges in the new PHI as we expect
439 // each case-based edge to be included in the unswitched switch in some
440 // cases.
441 // FIXME: This is really, really gross. It would be much cleaner if LLVM
442 // allowed us to create a single entry for a predecessor block without
443 // having separate entries for each "edge" even though these edges are
444 // required to produce identical results.
445 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
446 if (PN.getIncomingBlock(i) != &OldExitingBB)
447 continue;
448
449 Value *Incoming = PN.getIncomingValue(i);
450 if (FullUnswitch)
451 // No more edge from the old exiting block to the exit block.
452 PN.removeIncomingValue(i);
453
454 NewPN->addIncoming(Incoming, &OldPH);
455 }
456
457 // Now replace the old PHI with the new one and wire the old one in as an
458 // input to the new one.
459 PN.replaceAllUsesWith(NewPN);
460 NewPN->addIncoming(&PN, &ExitBB);
461 }
462}
463
464/// Hoist the current loop up to the innermost loop containing a remaining exit.
465///
466/// Because we've removed an exit from the loop, we may have changed the set of
467/// loops reachable and need to move the current loop up the loop nest or even
468/// to an entirely separate nest.
469static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
470 DominatorTree &DT, LoopInfo &LI,
471 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
472 // If the loop is already at the top level, we can't hoist it anywhere.
473 Loop *OldParentL = L.getParentLoop();
474 if (!OldParentL)
475 return;
476
478 L.getExitBlocks(Exits);
479 Loop *NewParentL = nullptr;
480 for (auto *ExitBB : Exits)
481 if (Loop *ExitL = LI.getLoopFor(ExitBB))
482 if (!NewParentL || NewParentL->contains(ExitL))
483 NewParentL = ExitL;
484
485 if (NewParentL == OldParentL)
486 return;
487
488 // The new parent loop (if different) should always contain the old one.
489 if (NewParentL)
490 assert(NewParentL->contains(OldParentL) &&
491 "Can only hoist this loop up the nest!");
492
493 // The preheader will need to move with the body of this loop. However,
494 // because it isn't in this loop we also need to update the primary loop map.
495 assert(OldParentL == LI.getLoopFor(&Preheader) &&
496 "Parent loop of this loop should contain this loop's preheader!");
497 LI.changeLoopFor(&Preheader, NewParentL);
498
499 // Remove this loop from its old parent.
500 OldParentL->removeChildLoop(&L);
501
502 // Add the loop either to the new parent or as a top-level loop.
503 if (NewParentL)
504 NewParentL->addChildLoop(&L);
505 else
506 LI.addTopLevelLoop(&L);
507
508 // Remove this loops blocks from the old parent and every other loop up the
509 // nest until reaching the new parent. Also update all of these
510 // no-longer-containing loops to reflect the nesting change.
511 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
512 OldContainingL = OldContainingL->getParentLoop()) {
513 llvm::erase_if(OldContainingL->getBlocksVector(),
514 [&](const BasicBlock *BB) {
515 return BB == &Preheader || L.contains(BB);
516 });
517
518 OldContainingL->getBlocksSet().erase(&Preheader);
519 for (BasicBlock *BB : L.blocks())
520 OldContainingL->getBlocksSet().erase(BB);
521
522 // Because we just hoisted a loop out of this one, we have essentially
523 // created new exit paths from it. That means we need to form LCSSA PHI
524 // nodes for values used in the no-longer-nested loop.
525 formLCSSA(*OldContainingL, DT, &LI, SE);
526
527 // We shouldn't need to form dedicated exits because the exit introduced
528 // here is the (just split by unswitching) preheader. However, after trivial
529 // unswitching it is possible to get new non-dedicated exits out of parent
530 // loop so let's conservatively form dedicated exit blocks and figure out
531 // if we can optimize later.
532 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
533 /*PreserveLCSSA*/ true);
534 }
535}
536
537// Return the top-most loop containing ExitBB and having ExitBB as exiting block
538// or the loop containing ExitBB, if there is no parent loop containing ExitBB
539// as exiting block.
541 const LoopInfo &LI) {
542 Loop *TopMost = LI.getLoopFor(ExitBB);
543 Loop *Current = TopMost;
544 while (Current) {
545 if (Current->isLoopExiting(ExitBB))
546 TopMost = Current;
547 Current = Current->getParentLoop();
548 }
549 return TopMost;
550}
551
552/// Unswitch a trivial branch if the condition is loop invariant.
553///
554/// This routine should only be called when loop code leading to the branch has
555/// been validated as trivial (no side effects). This routine checks if the
556/// condition is invariant and one of the successors is a loop exit or a loop
557/// latch with no side-effects. This allows us to unswitch without duplicating
558/// the loop, making it trivial.
559///
560/// If this routine fails to unswitch the branch it returns false.
561///
562/// If the branch can be unswitched, this routine splits the preheader and
563/// hoists the branch above that split. Preserves loop simplified form
564/// (splitting the exit block as necessary). It simplifies the branch within
565/// the loop to an unconditional branch but doesn't remove it entirely. Further
566/// cleanup can be done with some simplifycfg like pass.
567///
568/// If `SE` is not null, it will be updated based on the potential loop SCEVs
569/// invalidated by this.
571 LoopInfo &LI, ScalarEvolution *SE,
572 MemorySSAUpdater *MSSAU) {
573 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
574
575 // The loop invariant values that we want to unswitch.
576 TinyPtrVector<Value *> Invariants;
577
578 // When true, we're fully unswitching the branch rather than just unswitching
579 // some input conditions to the branch.
580 bool FullUnswitch = false;
581
583 if (L.isLoopInvariant(Cond)) {
584 Invariants.push_back(Cond);
585 FullUnswitch = true;
586 } else {
587 if (auto *CondInst = dyn_cast<Instruction>(Cond))
588 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
589 if (Invariants.empty()) {
590 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
591 return false;
592 }
593 }
594
595 std::optional<int> LatchIdx = std::nullopt;
596 auto *LoopLatch = L.getLoopLatch();
597 auto *ULExit = L.getUniqueLatchExitBlock();
598 if (SE && FullUnswitch && ULExit) {
599 if (BI.getSuccessor(0) == LoopLatch && L.contains(BI.getSuccessor(1)))
600 LatchIdx = 0;
601 else if (BI.getSuccessor(1) == LoopLatch && L.contains(BI.getSuccessor(0)))
602 LatchIdx = 1;
603 }
604
605 bool ModifiedBranch = false;
606 if (LatchIdx && areLoopExitPHIsLoopInvariant(L, *LoopLatch, *ULExit) &&
607 !llvm::any_of(*LoopLatch,
608 [](Instruction &I) { return I.mayHaveSideEffects(); })) {
609
610 // We need to prove the loop is finite, otherwise this change will convert
611 // it to a finite loop. This conservative check is good enough as we are
612 // mostly interested in perfect countable loop nests that perform
613 // calculations on arrays.
614 const SCEV *MaxBECount = SE->getConstantMaxBackedgeTakenCount(&L);
615 if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
617 Updates.push_back({cfg::UpdateKind::Delete, BI.getParent(),
618 BI.getSuccessor(*LatchIdx)});
619 Updates.push_back({cfg::UpdateKind::Insert, BI.getParent(), ULExit});
620 LoopLatch->removePredecessor(BI.getParent());
621 BI.setSuccessor(*LatchIdx, ULExit);
622 for (PHINode &PN : ULExit->phis()) {
623 Value *V = PN.getIncomingValueForBlock(LoopLatch);
624 PN.addIncoming(V, BI.getParent());
625 }
626 if (MSSAU)
627 MSSAU->applyUpdates(Updates, DT, /*UpdateDTFirst=*/true);
628 else
629 DT.applyUpdates(Updates);
630
631 ModifiedBranch = true;
632 }
633 }
634
635 // Check that one of the branch's successors exits, and which one.
636 bool ExitDirection = true;
637 int LoopExitSuccIdx = 0;
638 auto *LoopExitBB = BI.getSuccessor(0);
639 if (L.contains(LoopExitBB)) {
640 ExitDirection = false;
641 LoopExitSuccIdx = 1;
642 LoopExitBB = BI.getSuccessor(1);
643 if (L.contains(LoopExitBB)) {
644 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
645 assert(!ModifiedBranch && "Modified the branch but didn't unswitch");
646 return false;
647 }
648 }
649 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
650 auto *ParentBB = BI.getParent();
651 if (!ModifiedBranch &&
652 !areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
653 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
654 return false;
655 }
656
657 // When unswitching only part of the branch's condition, we need the exit
658 // block to be reached directly from the partially unswitched input. This can
659 // be done when the exit block is along the true edge and the branch condition
660 // is a graph of `or` operations, or the exit block is along the false edge
661 // and the condition is a graph of `and` operations.
662 if (!FullUnswitch) {
663 if (ExitDirection ? !match(Cond, m_LogicalOr())
664 : !match(Cond, m_LogicalAnd())) {
665 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
666 "non-full unswitch!\n");
667 assert(!ModifiedBranch && "Modified the branch but didn't unswitch");
668 return false;
669 }
670 }
671
672 LLVM_DEBUG({
673 dbgs() << " unswitching trivial invariant conditions for: " << BI
674 << "\n";
675 for (Value *Invariant : Invariants) {
676 dbgs() << " " << *Invariant << " == true";
677 if (Invariant != Invariants.back())
678 dbgs() << " ||";
679 dbgs() << "\n";
680 }
681 });
682
683 // If we have scalar evolutions, we need to invalidate them including this
684 // loop, the loop containing the exit block and the topmost parent loop
685 // exiting via LoopExitBB.
686 if (SE) {
687 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
688 SE->forgetLoop(ExitL);
689 else
690 // Forget the entire nest as this exits the entire nest.
691 SE->forgetTopmostLoop(&L);
693 }
694
695 if (MSSAU && VerifyMemorySSA)
696 MSSAU->getMemorySSA()->verifyMemorySSA();
697
698 // Split the preheader, so that we know that there is a safe place to insert
699 // the conditional branch. We will change the preheader to have a conditional
700 // branch on LoopCond.
701 BasicBlock *OldPH = L.getLoopPreheader();
702 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
703
704 // Now that we have a place to insert the conditional branch, create a place
705 // to branch to: this is the exit block out of the loop that we are
706 // unswitching. We need to split this if there are other loop predecessors.
707 // Because the loop is in simplified form, *any* other predecessor is enough.
708 BasicBlock *UnswitchedBB;
709 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
710 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
711 "A branch's parent isn't a predecessor!");
712 UnswitchedBB = LoopExitBB;
713 } else {
714 UnswitchedBB =
715 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "");
716 }
717
718 if (MSSAU && VerifyMemorySSA)
719 MSSAU->getMemorySSA()->verifyMemorySSA();
720
721 // Actually move the invariant uses into the unswitched position. If possible,
722 // we do this by moving the instructions, but when doing partial unswitching
723 // we do it by building a new merge of the values in the unswitched position.
724 OldPH->getTerminator()->eraseFromParent();
725 if (FullUnswitch) {
726 // If fully unswitching, we can use the existing branch instruction.
727 // Splice it into the old PH to gate reaching the new preheader and re-point
728 // its successors.
729 BI.moveBefore(*OldPH, OldPH->end());
730 BI.setCondition(Cond);
731 if (MSSAU) {
732 // Temporarily clone the terminator, to make MSSA update cheaper by
733 // separating "insert edge" updates from "remove edge" ones.
734 BI.clone()->insertInto(ParentBB, ParentBB->end());
735 } else {
736 // Create a new unconditional branch that will continue the loop as a new
737 // terminator.
738 Instruction *NewBI = UncondBrInst::Create(ContinueBB, ParentBB);
739 NewBI->setDebugLoc(BI.getDebugLoc());
740 }
741 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
742 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
743 } else {
744 // Only unswitching a subset of inputs to the condition, so we will need to
745 // build a new branch that merges the invariant inputs.
746 if (ExitDirection)
748 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
749 "condition!");
750 else
752 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
753 " condition!");
755 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
756 FreezeLoopUnswitchCond, OldPH->getTerminatorOrNull(), nullptr, DT, BI);
757 }
758
759 // Update the dominator tree with the added edge.
760 DT.insertEdge(OldPH, UnswitchedBB);
761
762 // After the dominator tree was updated with the added edge, update MemorySSA
763 // if available.
764 if (MSSAU) {
766 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
767 MSSAU->applyInsertUpdates(Updates, DT);
768 }
769
770 // Finish updating dominator tree and memory ssa for full unswitch.
771 if (FullUnswitch) {
772 if (MSSAU) {
773 Instruction *Term = ParentBB->getTerminator();
774 // Remove the cloned branch instruction and create unconditional branch
775 // now.
776 Instruction *NewBI = UncondBrInst::Create(ContinueBB, ParentBB);
777 NewBI->setDebugLoc(Term->getDebugLoc());
778 Term->eraseFromParent();
779 MSSAU->removeEdge(ParentBB, LoopExitBB);
780 }
781 DT.deleteEdge(ParentBB, LoopExitBB);
782 }
783
784 if (MSSAU && VerifyMemorySSA)
785 MSSAU->getMemorySSA()->verifyMemorySSA();
786
787 // Rewrite the relevant PHI nodes.
788 if (UnswitchedBB == LoopExitBB)
789 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
790 else
791 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
792 *ParentBB, *OldPH, FullUnswitch);
793
794 // The constant we can replace all of our invariants with inside the loop
795 // body. If any of the invariants have a value other than this the loop won't
796 // be entered.
797 ConstantInt *Replacement = ExitDirection
798 ? ConstantInt::getFalse(BI.getContext())
799 : ConstantInt::getTrue(BI.getContext());
800
801 // Since this is an i1 condition we can also trivially replace uses of it
802 // within the loop with a constant.
803 for (Value *Invariant : Invariants)
804 replaceLoopInvariantUses(L, Invariant, *Replacement);
805
806 // If this was full unswitching, we may have changed the nesting relationship
807 // for this loop so hoist it to its correct parent if needed.
808 if (FullUnswitch)
809 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
810
811 if (MSSAU && VerifyMemorySSA)
812 MSSAU->getMemorySSA()->verifyMemorySSA();
813
814 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
815 ++NumTrivial;
816 ++NumBranches;
817 return true;
818}
819
820/// Unswitch a trivial switch if the condition is loop invariant.
821///
822/// This routine should only be called when loop code leading to the switch has
823/// been validated as trivial (no side effects). This routine checks if the
824/// condition is invariant and that at least one of the successors is a loop
825/// exit. This allows us to unswitch without duplicating the loop, making it
826/// trivial.
827///
828/// If this routine fails to unswitch the switch it returns false.
829///
830/// If the switch can be unswitched, this routine splits the preheader and
831/// copies the switch above that split. If the default case is one of the
832/// exiting cases, it copies the non-exiting cases and points them at the new
833/// preheader. If the default case is not exiting, it copies the exiting cases
834/// and points the default at the preheader. It preserves loop simplified form
835/// (splitting the exit blocks as necessary). It simplifies the switch within
836/// the loop by removing now-dead cases. If the default case is one of those
837/// unswitched, it replaces its destination with a new basic block containing
838/// only unreachable. Such basic blocks, while technically loop exits, are not
839/// considered for unswitching so this is a stable transform and the same
840/// switch will not be revisited. If after unswitching there is only a single
841/// in-loop successor, the switch is further simplified to an unconditional
842/// branch. Still more cleanup can be done with some simplifycfg like pass.
843///
844/// If `SE` is not null, it will be updated based on the potential loop SCEVs
845/// invalidated by this.
847 LoopInfo &LI, ScalarEvolution *SE,
848 MemorySSAUpdater *MSSAU) {
849 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
850 Value *LoopCond = SI.getCondition();
851
852 // If this isn't switching on an invariant condition, we can't unswitch it.
853 if (!L.isLoopInvariant(LoopCond))
854 return false;
855
856 auto *ParentBB = SI.getParent();
857
858 // The same check must be used both for the default and the exit cases. We
859 // should never leave edges from the switch instruction to a basic block that
860 // we are unswitching, hence the condition used to determine the default case
861 // needs to also be used to populate ExitCaseIndices, which is then used to
862 // remove cases from the switch.
863 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
864 // BBToCheck is not an exit block if it is inside loop L.
865 if (L.contains(&BBToCheck))
866 return false;
867 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
868 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
869 return false;
870 // We do not unswitch a block that only has an unreachable statement, as
871 // it's possible this is a previously unswitched block. Only unswitch if
872 // either the terminator is not unreachable, or, if it is, it's not the only
873 // instruction in the block.
874 auto *TI = BBToCheck.getTerminator();
875 bool isUnreachable = isa<UnreachableInst>(TI);
876 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
877 };
878
879 SmallVector<int, 4> ExitCaseIndices;
880 for (auto Case : SI.cases())
881 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
882 ExitCaseIndices.push_back(Case.getCaseIndex());
883 BasicBlock *DefaultExitBB = nullptr;
886 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
887 DefaultExitBB = SI.getDefaultDest();
888 } else if (ExitCaseIndices.empty())
889 return false;
890
891 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
892
893 if (MSSAU && VerifyMemorySSA)
894 MSSAU->getMemorySSA()->verifyMemorySSA();
895
896 // We may need to invalidate SCEVs for the outermost loop reached by any of
897 // the exits.
898 Loop *OuterL = &L;
899
900 if (DefaultExitBB) {
901 // Check the loop containing this exit.
902 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
903 if (!ExitL || ExitL->contains(OuterL))
904 OuterL = ExitL;
905 }
906 for (unsigned Index : ExitCaseIndices) {
907 auto CaseI = SI.case_begin() + Index;
908 // Compute the outer loop from this exit.
909 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
910 if (!ExitL || ExitL->contains(OuterL))
911 OuterL = ExitL;
912 }
913
914 if (SE) {
915 if (OuterL)
916 SE->forgetLoop(OuterL);
917 else
918 SE->forgetTopmostLoop(&L);
919 }
920
921 if (DefaultExitBB) {
922 // Clear out the default destination temporarily to allow accurate
923 // predecessor lists to be examined below.
924 SI.setDefaultDest(nullptr);
925 }
926
927 // Store the exit cases into a separate data structure and remove them from
928 // the switch.
929 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
931 4> ExitCases;
932 ExitCases.reserve(ExitCaseIndices.size());
934 // We walk the case indices backwards so that we remove the last case first
935 // and don't disrupt the earlier indices.
936 for (unsigned Index : reverse(ExitCaseIndices)) {
937 auto CaseI = SI.case_begin() + Index;
938 // Save the value of this case.
939 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
940 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
941 // Delete the unswitched cases.
942 SIW.removeCase(CaseI);
943 }
944
945 // Check if after this all of the remaining cases point at the same
946 // successor.
947 BasicBlock *CommonSuccBB = nullptr;
948 if (SI.getNumCases() > 0 &&
949 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
950 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
951 }))
952 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
953 if (!DefaultExitBB) {
954 // If we're not unswitching the default, we need it to match any cases to
955 // have a common successor or if we have no cases it is the common
956 // successor.
957 if (SI.getNumCases() == 0)
958 CommonSuccBB = SI.getDefaultDest();
959 else if (SI.getDefaultDest() != CommonSuccBB)
960 CommonSuccBB = nullptr;
961 }
962
963 // Split the preheader, so that we know that there is a safe place to insert
964 // the switch.
965 BasicBlock *OldPH = L.getLoopPreheader();
966 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
967 OldPH->getTerminator()->eraseFromParent();
968
969 // Now add the unswitched switch. This new switch instruction inherits the
970 // debug location of the old switch, because it semantically replace the old
971 // one.
972 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
973 NewSI->setDebugLoc(SIW->getDebugLoc());
974 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
975
976 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
977 // First, we split any exit blocks with remaining in-loop predecessors. Then
978 // we update the PHIs in one of two ways depending on if there was a split.
979 // We walk in reverse so that we split in the same order as the cases
980 // appeared. This is purely for convenience of reading the resulting IR, but
981 // it doesn't cost anything really.
982 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
984 // Handle the default exit if necessary.
985 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
986 // ranges aren't quite powerful enough yet.
987 if (DefaultExitBB) {
988 if (pred_empty(DefaultExitBB)) {
989 UnswitchedExitBBs.insert(DefaultExitBB);
990 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
991 } else {
992 auto *SplitBB =
993 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
994 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
995 *ParentBB, *OldPH,
996 /*FullUnswitch*/ true);
997 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
998 }
999 }
1000 // Note that we must use a reference in the for loop so that we update the
1001 // container.
1002 for (auto &ExitCase : reverse(ExitCases)) {
1003 // Grab a reference to the exit block in the pair so that we can update it.
1004 BasicBlock *ExitBB = std::get<1>(ExitCase);
1005
1006 // If this case is the last edge into the exit block, we can simply reuse it
1007 // as it will no longer be a loop exit. No mapping necessary.
1008 if (pred_empty(ExitBB)) {
1009 // Only rewrite once.
1010 if (UnswitchedExitBBs.insert(ExitBB).second)
1011 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
1012 continue;
1013 }
1014
1015 // Otherwise we need to split the exit block so that we retain an exit
1016 // block from the loop and a target for the unswitched condition.
1017 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
1018 if (!SplitExitBB) {
1019 // If this is the first time we see this, do the split and remember it.
1020 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1021 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
1022 *ParentBB, *OldPH,
1023 /*FullUnswitch*/ true);
1024 }
1025 // Update the case pair to point to the split block.
1026 std::get<1>(ExitCase) = SplitExitBB;
1027 }
1028
1029 // Now add the unswitched cases. We do this in reverse order as we built them
1030 // in reverse order.
1031 for (auto &ExitCase : reverse(ExitCases)) {
1032 ConstantInt *CaseVal = std::get<0>(ExitCase);
1033 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
1034
1035 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
1036 }
1037
1038 // If the default was unswitched, re-point it and add explicit cases for
1039 // entering the loop.
1040 if (DefaultExitBB) {
1041 NewSIW->setDefaultDest(DefaultExitBB);
1042 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
1043
1044 // We removed all the exit cases, so we just copy the cases to the
1045 // unswitched switch.
1046 for (const auto &Case : SI.cases())
1047 NewSIW.addCase(Case.getCaseValue(), NewPH,
1049 } else if (DefaultCaseWeight) {
1050 // We have to set branch weight of the default case.
1051 uint64_t SW = *DefaultCaseWeight;
1052 for (const auto &Case : SI.cases()) {
1053 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
1054 assert(W &&
1055 "case weight must be defined as default case weight is defined");
1056 SW += *W;
1057 }
1058 NewSIW.setSuccessorWeight(0, SW);
1059 }
1060
1061 // If we ended up with a common successor for every path through the switch
1062 // after unswitching, rewrite it to an unconditional branch to make it easy
1063 // to recognize. Otherwise we potentially have to recognize the default case
1064 // pointing at unreachable and other complexity.
1065 if (CommonSuccBB) {
1066 BasicBlock *BB = SI.getParent();
1067 // We may have had multiple edges to this common successor block, so remove
1068 // them as predecessors. We skip the first one, either the default or the
1069 // actual first case.
1070 bool SkippedFirst = DefaultExitBB == nullptr;
1071 for (auto Case : SI.cases()) {
1072 assert(Case.getCaseSuccessor() == CommonSuccBB &&
1073 "Non-common successor!");
1074 (void)Case;
1075 if (!SkippedFirst) {
1076 SkippedFirst = true;
1077 continue;
1078 }
1079 CommonSuccBB->removePredecessor(BB,
1080 /*KeepOneInputPHIs*/ true);
1081 }
1082 // Now nuke the switch and replace it with a direct branch.
1083 Instruction *NewBI = UncondBrInst::Create(CommonSuccBB, BB);
1084 NewBI->setDebugLoc(SIW->getDebugLoc());
1085 SIW.eraseFromParent();
1086 } else if (DefaultExitBB) {
1087 assert(SI.getNumCases() > 0 &&
1088 "If we had no cases we'd have a common successor!");
1089 // Move the last case to the default successor. This is valid as if the
1090 // default got unswitched it cannot be reached. This has the advantage of
1091 // being simple and keeping the number of edges from this switch to
1092 // successors the same, and avoiding any PHI update complexity.
1093 auto LastCaseI = std::prev(SI.case_end());
1094
1095 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1097 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1098 SIW.removeCase(LastCaseI);
1099 }
1100
1101 // Walk the unswitched exit blocks and the unswitched split blocks and update
1102 // the dominator tree based on the CFG edits. While we are walking unordered
1103 // containers here, the API for applyUpdates takes an unordered list of
1104 // updates and requires them to not contain duplicates.
1106 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1107 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1108 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1109 }
1110 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1111 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1112 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1113 }
1114
1115 if (MSSAU) {
1116 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1117 if (VerifyMemorySSA)
1118 MSSAU->getMemorySSA()->verifyMemorySSA();
1119 } else {
1120 DT.applyUpdates(DTUpdates);
1121 }
1122
1123 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1124
1125 // We may have changed the nesting relationship for this loop so hoist it to
1126 // its correct parent if needed.
1127 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1128
1129 if (MSSAU && VerifyMemorySSA)
1130 MSSAU->getMemorySSA()->verifyMemorySSA();
1131
1132 ++NumTrivial;
1133 ++NumSwitches;
1134 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1135 return true;
1136}
1137
1138/// This routine scans the loop to find a branch or switch which occurs before
1139/// any side effects occur. These can potentially be unswitched without
1140/// duplicating the loop. If a branch or switch is successfully unswitched the
1141/// scanning continues to see if subsequent branches or switches have become
1142/// trivial. Once all trivial candidates have been unswitched, this routine
1143/// returns.
1144///
1145/// The return value indicates whether anything was unswitched (and therefore
1146/// changed).
1147///
1148/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1149/// invalidated by this.
1151 LoopInfo &LI, ScalarEvolution *SE,
1152 MemorySSAUpdater *MSSAU) {
1153 bool Changed = false;
1154
1155 // If loop header has only one reachable successor we should keep looking for
1156 // trivial condition candidates in the successor as well. An alternative is
1157 // to constant fold conditions and merge successors into loop header (then we
1158 // only need to check header's terminator). The reason for not doing this in
1159 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1160 // invariants. Folding dead branches could either eliminate the current loop
1161 // or make other loops unreachable. LCSSA form might also not be preserved
1162 // after deleting branches. The following code keeps traversing loop header's
1163 // successors until it finds the trivial condition candidate (condition that
1164 // is not a constant). Since unswitching generates branches with constant
1165 // conditions, this scenario could be very common in practice.
1166 BasicBlock *CurrentBB = L.getHeader();
1168 Visited.insert(CurrentBB);
1169 do {
1170 // Check if there are any side-effecting instructions (e.g. stores, calls,
1171 // volatile loads) in the part of the loop that the code *would* execute
1172 // without unswitching.
1173 if (MSSAU) // Possible early exit with MSSA
1174 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1175 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1176 return Changed;
1177 if (llvm::any_of(*CurrentBB,
1178 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1179 return Changed;
1180
1181 Instruction *CurrentTerm = CurrentBB->getTerminator();
1182
1183 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1184 // Don't bother trying to unswitch past a switch with a constant
1185 // condition. This should be removed prior to running this pass by
1186 // simplifycfg.
1187 if (isa<Constant>(SI->getCondition()))
1188 return Changed;
1189
1190 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1191 // Couldn't unswitch this one so we're done.
1192 return Changed;
1193
1194 // Mark that we managed to unswitch something.
1195 Changed = true;
1196
1197 // If unswitching turned the terminator into an unconditional branch then
1198 // we can continue. The unswitching logic specifically works to fold any
1199 // cases it can into an unconditional branch to make it easier to
1200 // recognize here.
1201 auto *BI = dyn_cast<UncondBrInst>(CurrentBB->getTerminator());
1202 if (!BI)
1203 return Changed;
1204
1205 CurrentBB = BI->getSuccessor();
1206 continue;
1207 }
1208
1209 auto *BI = dyn_cast<CondBrInst>(CurrentTerm);
1210 if (!BI)
1211 // We do not understand other terminator instructions.
1212 return Changed;
1213
1214 // Don't bother trying to unswitch past an unconditional branch or a branch
1215 // with a constant value. These should be removed by simplifycfg prior to
1216 // running this pass.
1217 if (isa<Constant>(skipTrivialSelect(BI->getCondition())))
1218 return Changed;
1219
1220 // Found a trivial condition candidate: non-foldable conditional branch. If
1221 // we fail to unswitch this, we can't do anything else that is trivial.
1222 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1223 return Changed;
1224
1225 // Mark that we managed to unswitch something.
1226 Changed = true;
1227
1228 // If we only unswitched some of the conditions feeding the branch, we won't
1229 // have collapsed it to a single successor.
1230 if (isa<CondBrInst>(CurrentBB->getTerminator()))
1231 return Changed;
1232
1233 // Follow the newly unconditional branch into its successor.
1234 CurrentBB = cast<UncondBrInst>(CurrentBB->getTerminator())->getSuccessor();
1235
1236 // When continuing, if we exit the loop or reach a previous visited block,
1237 // then we can not reach any trivial condition candidates (unfoldable
1238 // branch instructions or switch instructions) and no unswitch can happen.
1239 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1240
1241 return Changed;
1242}
1243
1244/// Build the cloned blocks for an unswitched copy of the given loop.
1245///
1246/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1247/// after the split block (`SplitBB`) that will be used to select between the
1248/// cloned and original loop.
1249///
1250/// This routine handles cloning all of the necessary loop blocks and exit
1251/// blocks including rewriting their instructions and the relevant PHI nodes.
1252/// Any loop blocks or exit blocks which are dominated by a different successor
1253/// than the one for this clone of the loop blocks can be trivially skipped. We
1254/// use the `DominatingSucc` map to determine whether a block satisfies that
1255/// property with a simple map lookup.
1256///
1257/// It also correctly creates the unconditional branch in the cloned
1258/// unswitched parent block to only point at the unswitched successor.
1259///
1260/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1261/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1262/// the cloned blocks (and their loops) are left without full `LoopInfo`
1263/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1264/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1265/// instead the caller must recompute an accurate DT. It *does* correctly
1266/// update the `AssumptionCache` provided in `AC`.
1268 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1269 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1270 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1272 ValueToValueMapTy &VMap,
1274 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1275 ScalarEvolution *SE) {
1277 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1278
1279 // We will need to clone a bunch of blocks, wrap up the clone operation in
1280 // a helper.
1281 auto CloneBlock = [&](BasicBlock *OldBB) {
1282 // Clone the basic block and insert it before the new preheader.
1283 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1284 NewBB->moveBefore(LoopPH);
1285
1286 // Record this block and the mapping.
1287 NewBlocks.push_back(NewBB);
1288 VMap[OldBB] = NewBB;
1289
1290 return NewBB;
1291 };
1292
1293 // We skip cloning blocks when they have a dominating succ that is not the
1294 // succ we are cloning for.
1295 auto SkipBlock = [&](BasicBlock *BB) {
1296 auto It = DominatingSucc.find(BB);
1297 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1298 };
1299
1300 // First, clone the preheader.
1301 auto *ClonedPH = CloneBlock(LoopPH);
1302
1303 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1304 for (auto *LoopBB : L.blocks())
1305 if (!SkipBlock(LoopBB))
1306 CloneBlock(LoopBB);
1307
1308 // Split all the loop exit edges so that when we clone the exit blocks, if
1309 // any of the exit blocks are *also* a preheader for some other loop, we
1310 // don't create multiple predecessors entering the loop header.
1311 for (auto *ExitBB : ExitBlocks) {
1312 if (SkipBlock(ExitBB))
1313 continue;
1314
1315 // When we are going to clone an exit, we don't need to clone all the
1316 // instructions in the exit block and we want to ensure we have an easy
1317 // place to merge the CFG, so split the exit first. This is always safe to
1318 // do because there cannot be any non-loop predecessors of a loop exit in
1319 // loop simplified form.
1320 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1321
1322 // Rearrange the names to make it easier to write test cases by having the
1323 // exit block carry the suffix rather than the merge block carrying the
1324 // suffix.
1325 MergeBB->takeName(ExitBB);
1326 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1327
1328 // Now clone the original exit block.
1329 auto *ClonedExitBB = CloneBlock(ExitBB);
1330 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1331 "Exit block should have been split to have one successor!");
1332 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1333 "Cloned exit block has the wrong successor!");
1334
1335 // Remap any cloned instructions and create a merge phi node for them.
1336 for (auto ZippedInsts : llvm::zip_first(
1337 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1338 llvm::make_range(ClonedExitBB->begin(),
1339 std::prev(ClonedExitBB->end())))) {
1340 Instruction &I = std::get<0>(ZippedInsts);
1341 Instruction &ClonedI = std::get<1>(ZippedInsts);
1342
1343 // The only instructions in the exit block should be PHI nodes and
1344 // potentially a landing pad.
1345 assert(
1347 "Bad instruction in exit block!");
1348 // We should have a value map between the instruction and its clone.
1349 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1350
1351 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1352 if (SE)
1353 if (auto *PN = dyn_cast<PHINode>(&I))
1355
1356 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1357
1358 auto *MergePN =
1359 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1360 MergePN->insertBefore(InsertPt);
1361 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1362 I.replaceAllUsesWith(MergePN);
1363 MergePN->addIncoming(&I, ExitBB);
1364 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1365 }
1366 }
1367
1368 // Rewrite the instructions in the cloned blocks to refer to the instructions
1369 // in the cloned blocks. We have to do this as a second pass so that we have
1370 // everything available. Also, we have inserted new instructions which may
1371 // include assume intrinsics, so we update the assumption cache while
1372 // processing this.
1373 Module *M = ClonedPH->getParent()->getParent();
1374 for (auto *ClonedBB : NewBlocks)
1375 for (Instruction &I : *ClonedBB) {
1376 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1378 RemapInstruction(&I, VMap,
1380 if (auto *II = dyn_cast<AssumeInst>(&I))
1382 }
1383
1384 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1385 // have spurious incoming values.
1386 for (auto *LoopBB : L.blocks())
1387 if (SkipBlock(LoopBB))
1388 for (auto *SuccBB : successors(LoopBB))
1389 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1390 for (PHINode &PN : ClonedSuccBB->phis())
1391 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1392
1393 // Remove the cloned parent as a predecessor of any successor we ended up
1394 // cloning other than the unswitched one.
1395 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1396 for (auto *SuccBB : successors(ParentBB)) {
1397 if (SuccBB == UnswitchedSuccBB)
1398 continue;
1399
1400 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1401 if (!ClonedSuccBB)
1402 continue;
1403
1404 ClonedSuccBB->removePredecessor(ClonedParentBB,
1405 /*KeepOneInputPHIs*/ true);
1406 }
1407
1408 // Replace the cloned branch with an unconditional branch to the cloned
1409 // unswitched successor.
1410 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1411 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1412 // Trivial Simplification. If Terminator is a conditional branch and
1413 // condition becomes dead - erase it.
1414 Value *ClonedConditionToErase = nullptr;
1415 if (auto *BI = dyn_cast<CondBrInst>(ClonedTerminator))
1416 ClonedConditionToErase = BI->getCondition();
1417 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1418 ClonedConditionToErase = SI->getCondition();
1419
1420 Instruction *BI = UncondBrInst::Create(ClonedSuccBB, ClonedParentBB);
1421 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1422 ClonedTerminator->eraseFromParent();
1423
1424 if (ClonedConditionToErase)
1425 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1426 MSSAU);
1427
1428 // If there are duplicate entries in the PHI nodes because of multiple edges
1429 // to the unswitched successor, we need to nuke all but one as we replaced it
1430 // with a direct branch.
1431 for (PHINode &PN : ClonedSuccBB->phis()) {
1432 bool Found = false;
1433 // Loop over the incoming operands backwards so we can easily delete as we
1434 // go without invalidating the index.
1435 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1436 if (PN.getIncomingBlock(i) != ClonedParentBB)
1437 continue;
1438 if (!Found) {
1439 Found = true;
1440 continue;
1441 }
1442 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1443 }
1444 }
1445
1446 // Record the domtree updates for the new blocks.
1448 for (auto *ClonedBB : NewBlocks) {
1449 for (auto *SuccBB : successors(ClonedBB))
1450 if (SuccSet.insert(SuccBB).second)
1451 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1452 SuccSet.clear();
1453 }
1454
1455 return ClonedPH;
1456}
1457
1458/// Recursively clone the specified loop and all of its children.
1459///
1460/// The target parent loop for the clone should be provided, or can be null if
1461/// the clone is a top-level loop. While cloning, all the blocks are mapped
1462/// with the provided value map. The entire original loop must be present in
1463/// the value map. The cloned loop is returned.
1464static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1465 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1466 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1467 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1468 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1469 for (auto *BB : OrigL.blocks()) {
1470 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1471 ClonedL.addBlockEntry(ClonedBB);
1472 if (LI.getLoopFor(BB) == &OrigL)
1473 LI.changeLoopFor(ClonedBB, &ClonedL);
1474 }
1475 };
1476
1477 // We specially handle the first loop because it may get cloned into
1478 // a different parent and because we most commonly are cloning leaf loops.
1479 Loop *ClonedRootL = LI.AllocateLoop();
1480 if (RootParentL)
1481 RootParentL->addChildLoop(ClonedRootL);
1482 else
1483 LI.addTopLevelLoop(ClonedRootL);
1484 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1485
1486 if (OrigRootL.isInnermost())
1487 return ClonedRootL;
1488
1489 // If we have a nest, we can quickly clone the entire loop nest using an
1490 // iterative approach because it is a tree. We keep the cloned parent in the
1491 // data structure to avoid repeatedly querying through a map to find it.
1492 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1493 // Build up the loops to clone in reverse order as we'll clone them from the
1494 // back.
1495 for (Loop *ChildL : llvm::reverse(OrigRootL))
1496 LoopsToClone.push_back({ClonedRootL, ChildL});
1497 do {
1498 Loop *ClonedParentL, *L;
1499 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1500 Loop *ClonedL = LI.AllocateLoop();
1501 ClonedParentL->addChildLoop(ClonedL);
1502 AddClonedBlocksToLoop(*L, *ClonedL);
1503 for (Loop *ChildL : llvm::reverse(*L))
1504 LoopsToClone.push_back({ClonedL, ChildL});
1505 } while (!LoopsToClone.empty());
1506
1507 return ClonedRootL;
1508}
1509
1510/// Build the cloned loops of an original loop from unswitching.
1511///
1512/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1513/// operation. We need to re-verify that there even is a loop (as the backedge
1514/// may not have been cloned), and even if there are remaining backedges the
1515/// backedge set may be different. However, we know that each child loop is
1516/// undisturbed, we only need to find where to place each child loop within
1517/// either any parent loop or within a cloned version of the original loop.
1518///
1519/// Because child loops may end up cloned outside of any cloned version of the
1520/// original loop, multiple cloned sibling loops may be created. All of them
1521/// are returned so that the newly introduced loop nest roots can be
1522/// identified.
1523static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1524 const ValueToValueMapTy &VMap, LoopInfo &LI,
1525 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1526 Loop *ClonedL = nullptr;
1527
1528 auto *OrigPH = OrigL.getLoopPreheader();
1529 auto *OrigHeader = OrigL.getHeader();
1530
1531 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1532 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1533
1534 // We need to know the loops of the cloned exit blocks to even compute the
1535 // accurate parent loop. If we only clone exits to some parent of the
1536 // original parent, we want to clone into that outer loop. We also keep track
1537 // of the loops that our cloned exit blocks participate in.
1538 Loop *ParentL = nullptr;
1539 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1541 ClonedExitsInLoops.reserve(ExitBlocks.size());
1542 for (auto *ExitBB : ExitBlocks)
1543 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1544 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1545 ExitLoopMap[ClonedExitBB] = ExitL;
1546 ClonedExitsInLoops.push_back(ClonedExitBB);
1547 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1548 ParentL = ExitL;
1549 }
1550 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1551 ParentL->contains(OrigL.getParentLoop())) &&
1552 "The computed parent loop should always contain (or be) the parent of "
1553 "the original loop.");
1554
1555 // We build the set of blocks dominated by the cloned header from the set of
1556 // cloned blocks out of the original loop. While not all of these will
1557 // necessarily be in the cloned loop, it is enough to establish that they
1558 // aren't in unreachable cycles, etc.
1559 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1560 for (auto *BB : OrigL.blocks())
1561 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1562 ClonedLoopBlocks.insert(ClonedBB);
1563
1564 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1565 // skipped cloning some region of this loop which can in turn skip some of
1566 // the backedges so we have to rebuild the blocks in the loop based on the
1567 // backedges that remain after cloning.
1569 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1570 for (auto *Pred : predecessors(ClonedHeader)) {
1571 // The only possible non-loop header predecessor is the preheader because
1572 // we know we cloned the loop in simplified form.
1573 if (Pred == ClonedPH)
1574 continue;
1575
1576 // Because the loop was in simplified form, the only non-loop predecessor
1577 // should be the preheader.
1578 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1579 "header other than the preheader "
1580 "that is not part of the loop!");
1581
1582 // Insert this block into the loop set and on the first visit (and if it
1583 // isn't the header we're currently walking) put it into the worklist to
1584 // recurse through.
1585 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1586 Worklist.push_back(Pred);
1587 }
1588
1589 // If we had any backedges then there *is* a cloned loop. Put the header into
1590 // the loop set and then walk the worklist backwards to find all the blocks
1591 // that remain within the loop after cloning.
1592 if (!BlocksInClonedLoop.empty()) {
1593 BlocksInClonedLoop.insert(ClonedHeader);
1594
1595 while (!Worklist.empty()) {
1596 BasicBlock *BB = Worklist.pop_back_val();
1597 assert(BlocksInClonedLoop.count(BB) &&
1598 "Didn't put block into the loop set!");
1599
1600 // Insert any predecessors that are in the possible set into the cloned
1601 // set, and if the insert is successful, add them to the worklist. Note
1602 // that we filter on the blocks that are definitely reachable via the
1603 // backedge to the loop header so we may prune out dead code within the
1604 // cloned loop.
1605 for (auto *Pred : predecessors(BB))
1606 if (ClonedLoopBlocks.count(Pred) &&
1607 BlocksInClonedLoop.insert(Pred).second)
1608 Worklist.push_back(Pred);
1609 }
1610
1611 ClonedL = LI.AllocateLoop();
1612 if (ParentL) {
1613 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1614 ParentL->addChildLoop(ClonedL);
1615 } else {
1616 LI.addTopLevelLoop(ClonedL);
1617 }
1618 NonChildClonedLoops.push_back(ClonedL);
1619
1620 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1621 // We don't want to just add the cloned loop blocks based on how we
1622 // discovered them. The original order of blocks was carefully built in
1623 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1624 // that logic, we just re-walk the original blocks (and those of the child
1625 // loops) and filter them as we add them into the cloned loop.
1626 for (auto *BB : OrigL.blocks()) {
1627 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1628 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1629 continue;
1630
1631 // Directly add the blocks that are only in this loop.
1632 if (LI.getLoopFor(BB) == &OrigL) {
1633 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1634 continue;
1635 }
1636
1637 // We want to manually add it to this loop and parents.
1638 // Registering it with LoopInfo will happen when we clone the top
1639 // loop for this block.
1640 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1641 PL->addBlockEntry(ClonedBB);
1642 }
1643
1644 // Now add each child loop whose header remains within the cloned loop. All
1645 // of the blocks within the loop must satisfy the same constraints as the
1646 // header so once we pass the header checks we can just clone the entire
1647 // child loop nest.
1648 for (Loop *ChildL : OrigL) {
1649 auto *ClonedChildHeader =
1650 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1651 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1652 continue;
1653
1654#ifndef NDEBUG
1655 // We should never have a cloned child loop header but fail to have
1656 // all of the blocks for that child loop.
1657 for (auto *ChildLoopBB : ChildL->blocks())
1658 assert(BlocksInClonedLoop.count(
1659 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1660 "Child cloned loop has a header within the cloned outer "
1661 "loop but not all of its blocks!");
1662#endif
1663
1664 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1665 }
1666 }
1667
1668 // Now that we've handled all the components of the original loop that were
1669 // cloned into a new loop, we still need to handle anything from the original
1670 // loop that wasn't in a cloned loop.
1671
1672 // Figure out what blocks are left to place within any loop nest containing
1673 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1674 // them.
1675 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1676 if (BlocksInClonedLoop.empty())
1677 UnloopedBlockSet.insert(ClonedPH);
1678 for (auto *ClonedBB : ClonedLoopBlocks)
1679 if (!BlocksInClonedLoop.count(ClonedBB))
1680 UnloopedBlockSet.insert(ClonedBB);
1681
1682 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1683 // backwards across these to process them inside out. The order shouldn't
1684 // matter as we're just trying to build up the map from inside-out; we use
1685 // the map in a more stably ordered way below.
1686 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1687 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1688 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1689 ExitLoopMap.lookup(RHS)->getLoopDepth();
1690 });
1691
1692 // Populate the existing ExitLoopMap with everything reachable from each
1693 // exit, starting from the inner most exit.
1694 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1695 assert(Worklist.empty() && "Didn't clear worklist!");
1696
1697 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1698 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1699
1700 // Walk the CFG back until we hit the cloned PH adding everything reachable
1701 // and in the unlooped set to this exit block's loop.
1702 Worklist.push_back(ExitBB);
1703 do {
1704 BasicBlock *BB = Worklist.pop_back_val();
1705 // We can stop recursing at the cloned preheader (if we get there).
1706 if (BB == ClonedPH)
1707 continue;
1708
1709 for (BasicBlock *PredBB : predecessors(BB)) {
1710 // If this pred has already been moved to our set or is part of some
1711 // (inner) loop, no update needed.
1712 if (!UnloopedBlockSet.erase(PredBB)) {
1713 assert(
1714 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1715 "Predecessor not mapped to a loop!");
1716 continue;
1717 }
1718
1719 // We just insert into the loop set here. We'll add these blocks to the
1720 // exit loop after we build up the set in an order that doesn't rely on
1721 // predecessor order (which in turn relies on use list order).
1722 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1723 (void)Inserted;
1724 assert(Inserted && "Should only visit an unlooped block once!");
1725
1726 // And recurse through to its predecessors.
1727 Worklist.push_back(PredBB);
1728 }
1729 } while (!Worklist.empty());
1730 }
1731
1732 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1733 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1734 // in their original order adding them to the correct loop.
1735
1736 // We need a stable insertion order. We use the order of the original loop
1737 // order and map into the correct parent loop.
1738 for (auto *BB : llvm::concat<BasicBlock *const>(
1739 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1740 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1741 OuterL->addBasicBlockToLoop(BB, LI);
1742
1743#ifndef NDEBUG
1744 for (auto &BBAndL : ExitLoopMap) {
1745 auto *BB = BBAndL.first;
1746 auto *OuterL = BBAndL.second;
1747 assert(LI.getLoopFor(BB) == OuterL &&
1748 "Failed to put all blocks into outer loops!");
1749 }
1750#endif
1751
1752 // Now that all the blocks are placed into the correct containing loop in the
1753 // absence of child loops, find all the potentially cloned child loops and
1754 // clone them into whatever outer loop we placed their header into.
1755 for (Loop *ChildL : OrigL) {
1756 auto *ClonedChildHeader =
1757 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1758 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1759 continue;
1760
1761#ifndef NDEBUG
1762 for (auto *ChildLoopBB : ChildL->blocks())
1763 assert(VMap.count(ChildLoopBB) &&
1764 "Cloned a child loop header but not all of that loops blocks!");
1765#endif
1766
1767 NonChildClonedLoops.push_back(cloneLoopNest(
1768 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1769 }
1770}
1771
1772static void
1774 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1775 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1776 // Find all the dead clones, and remove them from their successors.
1778 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1779 for (const auto &VMap : VMaps)
1780 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1781 if (!DT.isReachableFromEntry(ClonedBB)) {
1782 for (BasicBlock *SuccBB : successors(ClonedBB))
1783 SuccBB->removePredecessor(ClonedBB);
1784 DeadBlocks.push_back(ClonedBB);
1785 }
1786
1787 // Remove all MemorySSA in the dead blocks
1788 if (MSSAU) {
1789 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1790 DeadBlocks.end());
1791 MSSAU->removeBlocks(DeadBlockSet);
1792 }
1793
1794 // Drop any remaining references to break cycles.
1795 for (BasicBlock *BB : DeadBlocks)
1796 BB->dropAllReferences();
1797 // Erase them from the IR.
1798 for (BasicBlock *BB : DeadBlocks)
1799 BB->eraseFromParent();
1800}
1801
1804 DominatorTree &DT, LoopInfo &LI,
1805 MemorySSAUpdater *MSSAU,
1806 ScalarEvolution *SE,
1807 LPMUpdater &LoopUpdater) {
1808 // Find all the dead blocks tied to this loop, and remove them from their
1809 // successors.
1811
1812 // Start with loop/exit blocks and get a transitive closure of reachable dead
1813 // blocks.
1814 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1815 ExitBlocks.end());
1816 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1817 while (!DeathCandidates.empty()) {
1818 auto *BB = DeathCandidates.pop_back_val();
1819 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1820 for (BasicBlock *SuccBB : successors(BB)) {
1821 SuccBB->removePredecessor(BB);
1822 DeathCandidates.push_back(SuccBB);
1823 }
1824 DeadBlockSet.insert(BB);
1825 }
1826 }
1827
1828 // Remove all MemorySSA in the dead blocks
1829 if (MSSAU)
1830 MSSAU->removeBlocks(DeadBlockSet);
1831
1832 // Filter out the dead blocks from the exit blocks list so that it can be
1833 // used in the caller.
1834 llvm::erase_if(ExitBlocks,
1835 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1836
1837 // Walk from this loop up through its parents removing all of the dead blocks.
1838 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1839 for (auto *BB : DeadBlockSet)
1840 ParentL->getBlocksSet().erase(BB);
1841 llvm::erase_if(ParentL->getBlocksVector(),
1842 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1843 }
1844
1845 // Now delete the dead child loops. This raw delete will clear them
1846 // recursively.
1847 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1848 if (!DeadBlockSet.count(ChildL->getHeader()))
1849 return false;
1850
1851 assert(llvm::all_of(ChildL->blocks(),
1852 [&](BasicBlock *ChildBB) {
1853 return DeadBlockSet.count(ChildBB);
1854 }) &&
1855 "If the child loop header is dead all blocks in the child loop must "
1856 "be dead as well!");
1857 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1858 if (SE)
1860 LI.destroy(ChildL);
1861 return true;
1862 });
1863
1864 // Remove the loop mappings for the dead blocks and drop all the references
1865 // from these blocks to others to handle cyclic references as we start
1866 // deleting the blocks themselves.
1867 for (auto *BB : DeadBlockSet) {
1868 // Check that the dominator tree has already been updated.
1869 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1870 LI.changeLoopFor(BB, nullptr);
1871 // Drop all uses of the instructions to make sure we won't have dangling
1872 // uses in other blocks.
1873 for (auto &I : *BB)
1874 if (!I.use_empty())
1875 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1876 BB->dropAllReferences();
1877 }
1878
1879 // Actually delete the blocks now that they've been fully unhooked from the
1880 // IR.
1881 for (auto *BB : DeadBlockSet)
1882 BB->eraseFromParent();
1883}
1884
1885/// Recompute the set of blocks in a loop after unswitching.
1886///
1887/// This walks from the original headers predecessors to rebuild the loop. We
1888/// take advantage of the fact that new blocks can't have been added, and so we
1889/// filter by the original loop's blocks. This also handles potentially
1890/// unreachable code that we don't want to explore but might be found examining
1891/// the predecessors of the header.
1892///
1893/// If the original loop is no longer a loop, this will return an empty set. If
1894/// it remains a loop, all the blocks within it will be added to the set
1895/// (including those blocks in inner loops).
1897 LoopInfo &LI) {
1899
1900 auto *PH = L.getLoopPreheader();
1901 auto *Header = L.getHeader();
1902
1903 // A worklist to use while walking backwards from the header.
1905
1906 // First walk the predecessors of the header to find the backedges. This will
1907 // form the basis of our walk.
1908 for (auto *Pred : predecessors(Header)) {
1909 // Skip the preheader.
1910 if (Pred == PH)
1911 continue;
1912
1913 // Because the loop was in simplified form, the only non-loop predecessor
1914 // is the preheader.
1915 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1916 "than the preheader that is not part of the "
1917 "loop!");
1918
1919 // Insert this block into the loop set and on the first visit and, if it
1920 // isn't the header we're currently walking, put it into the worklist to
1921 // recurse through.
1922 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1923 Worklist.push_back(Pred);
1924 }
1925
1926 // If no backedges were found, we're done.
1927 if (LoopBlockSet.empty())
1928 return LoopBlockSet;
1929
1930 // We found backedges, recurse through them to identify the loop blocks.
1931 while (!Worklist.empty()) {
1932 BasicBlock *BB = Worklist.pop_back_val();
1933 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1934
1935 // No need to walk past the header.
1936 if (BB == Header)
1937 continue;
1938
1939 // Because we know the inner loop structure remains valid we can use the
1940 // loop structure to jump immediately across the entire nested loop.
1941 // Further, because it is in loop simplified form, we can directly jump
1942 // to its preheader afterward.
1943 if (Loop *InnerL = LI.getLoopFor(BB))
1944 if (InnerL != &L) {
1945 assert(L.contains(InnerL) &&
1946 "Should not reach a loop *outside* this loop!");
1947 // The preheader is the only possible predecessor of the loop so
1948 // insert it into the set and check whether it was already handled.
1949 auto *InnerPH = InnerL->getLoopPreheader();
1950 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1951 "but not contain the inner loop "
1952 "preheader!");
1953 if (!LoopBlockSet.insert(InnerPH).second)
1954 // The only way to reach the preheader is through the loop body
1955 // itself so if it has been visited the loop is already handled.
1956 continue;
1957
1958 // Insert all of the blocks (other than those already present) into
1959 // the loop set. We expect at least the block that led us to find the
1960 // inner loop to be in the block set, but we may also have other loop
1961 // blocks if they were already enqueued as predecessors of some other
1962 // outer loop block.
1963 for (auto *InnerBB : InnerL->blocks()) {
1964 if (InnerBB == BB) {
1965 assert(LoopBlockSet.count(InnerBB) &&
1966 "Block should already be in the set!");
1967 continue;
1968 }
1969
1970 LoopBlockSet.insert(InnerBB);
1971 }
1972
1973 // Add the preheader to the worklist so we will continue past the
1974 // loop body.
1975 Worklist.push_back(InnerPH);
1976 continue;
1977 }
1978
1979 // Insert any predecessors that were in the original loop into the new
1980 // set, and if the insert is successful, add them to the worklist.
1981 for (auto *Pred : predecessors(BB))
1982 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1983 Worklist.push_back(Pred);
1984 }
1985
1986 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1987
1988 // We've found all the blocks participating in the loop, return our completed
1989 // set.
1990 return LoopBlockSet;
1991}
1992
1993/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1994///
1995/// The removal may have removed some child loops entirely but cannot have
1996/// disturbed any remaining child loops. However, they may need to be hoisted
1997/// to the parent loop (or to be top-level loops). The original loop may be
1998/// completely removed.
1999///
2000/// The sibling loops resulting from this update are returned. If the original
2001/// loop remains a valid loop, it will be the first entry in this list with all
2002/// of the newly sibling loops following it.
2003///
2004/// Returns true if the loop remains a loop after unswitching, and false if it
2005/// is no longer a loop after unswitching (and should not continue to be
2006/// referenced).
2008 LoopInfo &LI,
2009 SmallVectorImpl<Loop *> &HoistedLoops,
2010 ScalarEvolution *SE) {
2011 auto *PH = L.getLoopPreheader();
2012
2013 // Compute the actual parent loop from the exit blocks. Because we may have
2014 // pruned some exits the loop may be different from the original parent.
2015 Loop *ParentL = nullptr;
2016 SmallVector<Loop *, 4> ExitLoops;
2017 SmallVector<BasicBlock *, 4> ExitsInLoops;
2018 ExitsInLoops.reserve(ExitBlocks.size());
2019 for (auto *ExitBB : ExitBlocks)
2020 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
2021 ExitLoops.push_back(ExitL);
2022 ExitsInLoops.push_back(ExitBB);
2023 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
2024 ParentL = ExitL;
2025 }
2026
2027 // Recompute the blocks participating in this loop. This may be empty if it
2028 // is no longer a loop.
2029 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
2030
2031 // If we still have a loop, we need to re-set the loop's parent as the exit
2032 // block set changing may have moved it within the loop nest. Note that this
2033 // can only happen when this loop has a parent as it can only hoist the loop
2034 // *up* the nest.
2035 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
2036 // Remove this loop's (original) blocks from all of the intervening loops.
2037 for (Loop *IL = L.getParentLoop(); IL != ParentL;
2038 IL = IL->getParentLoop()) {
2039 IL->getBlocksSet().erase(PH);
2040 for (auto *BB : L.blocks())
2041 IL->getBlocksSet().erase(BB);
2042 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
2043 return BB == PH || L.contains(BB);
2044 });
2045 }
2046
2047 LI.changeLoopFor(PH, ParentL);
2048 L.getParentLoop()->removeChildLoop(&L);
2049 if (ParentL)
2050 ParentL->addChildLoop(&L);
2051 else
2052 LI.addTopLevelLoop(&L);
2053 }
2054
2055 // Now we update all the blocks which are no longer within the loop.
2056 auto &Blocks = L.getBlocksVector();
2057 auto BlocksSplitI =
2058 LoopBlockSet.empty()
2059 ? Blocks.begin()
2060 : std::stable_partition(
2061 Blocks.begin(), Blocks.end(),
2062 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
2063
2064 // Before we erase the list of unlooped blocks, build a set of them.
2065 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
2066 if (LoopBlockSet.empty())
2067 UnloopedBlocks.insert(PH);
2068
2069 // Now erase these blocks from the loop.
2070 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
2071 L.getBlocksSet().erase(BB);
2072 Blocks.erase(BlocksSplitI, Blocks.end());
2073
2074 // Sort the exits in ascending loop depth, we'll work backwards across these
2075 // to process them inside out.
2076 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
2077 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
2078 });
2079
2080 // We'll build up a set for each exit loop.
2081 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
2082 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
2083
2084 auto RemoveUnloopedBlocksFromLoop =
2085 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
2086 for (auto *BB : UnloopedBlocks)
2087 L.getBlocksSet().erase(BB);
2088 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
2089 return UnloopedBlocks.count(BB);
2090 });
2091 };
2092
2094 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2095 assert(Worklist.empty() && "Didn't clear worklist!");
2096 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2097
2098 // Grab the next exit block, in decreasing loop depth order.
2099 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2100 Loop &ExitL = *LI.getLoopFor(ExitBB);
2101 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2102
2103 // Erase all of the unlooped blocks from the loops between the previous
2104 // exit loop and this exit loop. This works because the ExitInLoops list is
2105 // sorted in increasing order of loop depth and thus we visit loops in
2106 // decreasing order of loop depth.
2107 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2108 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2109
2110 // Walk the CFG back until we hit the cloned PH adding everything reachable
2111 // and in the unlooped set to this exit block's loop.
2112 Worklist.push_back(ExitBB);
2113 do {
2114 BasicBlock *BB = Worklist.pop_back_val();
2115 // We can stop recursing at the cloned preheader (if we get there).
2116 if (BB == PH)
2117 continue;
2118
2119 for (BasicBlock *PredBB : predecessors(BB)) {
2120 // If this pred has already been moved to our set or is part of some
2121 // (inner) loop, no update needed.
2122 if (!UnloopedBlocks.erase(PredBB)) {
2123 assert((NewExitLoopBlocks.count(PredBB) ||
2124 ExitL.contains(LI.getLoopFor(PredBB))) &&
2125 "Predecessor not in a nested loop (or already visited)!");
2126 continue;
2127 }
2128
2129 // We just insert into the loop set here. We'll add these blocks to the
2130 // exit loop after we build up the set in a deterministic order rather
2131 // than the predecessor-influenced visit order.
2132 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2133 (void)Inserted;
2134 assert(Inserted && "Should only visit an unlooped block once!");
2135
2136 // And recurse through to its predecessors.
2137 Worklist.push_back(PredBB);
2138 }
2139 } while (!Worklist.empty());
2140
2141 // If blocks in this exit loop were directly part of the original loop (as
2142 // opposed to a child loop) update the map to point to this exit loop. This
2143 // just updates a map and so the fact that the order is unstable is fine.
2144 for (auto *BB : NewExitLoopBlocks)
2145 if (Loop *BBL = LI.getLoopFor(BB))
2146 if (BBL == &L || !L.contains(BBL))
2147 LI.changeLoopFor(BB, &ExitL);
2148
2149 // We will remove the remaining unlooped blocks from this loop in the next
2150 // iteration or below.
2151 NewExitLoopBlocks.clear();
2152 }
2153
2154 // Any remaining unlooped blocks are no longer part of any loop unless they
2155 // are part of some child loop.
2156 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2157 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2158 for (auto *BB : UnloopedBlocks)
2159 if (Loop *BBL = LI.getLoopFor(BB))
2160 if (BBL == &L || !L.contains(BBL))
2161 LI.changeLoopFor(BB, nullptr);
2162
2163 // Sink all the child loops whose headers are no longer in the loop set to
2164 // the parent (or to be top level loops). We reach into the loop and directly
2165 // update its subloop vector to make this batch update efficient.
2166 auto &SubLoops = L.getSubLoopsVector();
2167 auto SubLoopsSplitI =
2168 LoopBlockSet.empty()
2169 ? SubLoops.begin()
2170 : std::stable_partition(
2171 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2172 return LoopBlockSet.count(SubL->getHeader());
2173 });
2174 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2175 HoistedLoops.push_back(HoistedL);
2176 HoistedL->setParentLoop(nullptr);
2177
2178 // To compute the new parent of this hoisted loop we look at where we
2179 // placed the preheader above. We can't lookup the header itself because we
2180 // retained the mapping from the header to the hoisted loop. But the
2181 // preheader and header should have the exact same new parent computed
2182 // based on the set of exit blocks from the original loop as the preheader
2183 // is a predecessor of the header and so reached in the reverse walk. And
2184 // because the loops were all in simplified form the preheader of the
2185 // hoisted loop can't be part of some *other* loop.
2186 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2187 NewParentL->addChildLoop(HoistedL);
2188 else
2189 LI.addTopLevelLoop(HoistedL);
2190 }
2191 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2192
2193 // Actually delete the loop if nothing remained within it.
2194 if (Blocks.empty()) {
2195 assert(SubLoops.empty() &&
2196 "Failed to remove all subloops from the original loop!");
2197 if (Loop *ParentL = L.getParentLoop())
2198 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2199 else
2200 LI.removeLoop(llvm::find(LI, &L));
2201 // markLoopAsDeleted for L should be triggered by the caller (it is
2202 // typically done within postUnswitch).
2203 if (SE)
2205 LI.destroy(&L);
2206 return false;
2207 }
2208
2209 return true;
2210}
2211
2212/// Helper to visit a dominator subtree, invoking a callable on each node.
2213///
2214/// Returning false at any point will stop walking past that node of the tree.
2215template <typename CallableT>
2216void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2218 DomWorklist.push_back(DT[BB]);
2219#ifndef NDEBUG
2221 Visited.insert(DT[BB]);
2222#endif
2223 do {
2224 DomTreeNode *N = DomWorklist.pop_back_val();
2225
2226 // Visit this node.
2227 if (!Callable(N->getBlock()))
2228 continue;
2229
2230 // Accumulate the child nodes.
2231 for (DomTreeNode *ChildN : *N) {
2232 assert(Visited.insert(ChildN).second &&
2233 "Cannot visit a node twice when walking a tree!");
2234 DomWorklist.push_back(ChildN);
2235 }
2236 } while (!DomWorklist.empty());
2237}
2238
2240 bool CurrentLoopValid, bool PartiallyInvariant,
2241 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2242 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2243 if (!NewLoops.empty())
2244 U.addSiblingLoops(NewLoops);
2245
2246 // If the current loop remains valid, we should revisit it to catch any
2247 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2248 if (CurrentLoopValid) {
2249 if (PartiallyInvariant) {
2250 // Mark the new loop as partially unswitched, to avoid unswitching on
2251 // the same condition again.
2252 L.addStringLoopAttribute("llvm.loop.unswitch.partial.disable",
2253 {"llvm.loop.unswitch.partial"});
2254 } else if (InjectedCondition) {
2255 // Do the same for injection of invariant conditions.
2256 L.addStringLoopAttribute("llvm.loop.unswitch.injection.disable",
2257 {"llvm.loop.unswitch.injection"});
2258 } else
2259 U.revisitCurrentLoop();
2260 } else
2261 U.markLoopAsDeleted(L, LoopName);
2262}
2263
2265 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2266 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2268 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2269 auto *ParentBB = TI.getParent();
2271 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2272
2273 // Save the current loop name in a variable so that we can report it even
2274 // after it has been deleted.
2275 std::string LoopName(L.getName());
2276
2277 // We can only unswitch switches, conditional branches with an invariant
2278 // condition, or combining invariant conditions with an instruction or
2279 // partially invariant instructions.
2280 assert((SI || BI) && "Can only unswitch switches and conditional branch!");
2281 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2282 bool FullUnswitch =
2283 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2284 !PartiallyInvariant);
2285 if (FullUnswitch)
2286 assert(Invariants.size() == 1 &&
2287 "Cannot have other invariants with full unswitching!");
2288 else
2290 "Partial unswitching requires an instruction as the condition!");
2291
2292 if (MSSAU && VerifyMemorySSA)
2293 MSSAU->getMemorySSA()->verifyMemorySSA();
2294
2295 // Constant and BBs tracking the cloned and continuing successor. When we are
2296 // unswitching the entire condition, this can just be trivially chosen to
2297 // unswitch towards `true`. However, when we are unswitching a set of
2298 // invariants combined with `and` or `or` or partially invariant instructions,
2299 // the combining operation determines the best direction to unswitch: we want
2300 // to unswitch the direction that will collapse the branch.
2301 bool Direction = true;
2302 int ClonedSucc = 0;
2303 if (!FullUnswitch) {
2305 (void)Cond;
2307 PartiallyInvariant) &&
2308 "Only `or`, `and`, an `select`, partially invariant instructions "
2309 "can combine invariants being unswitched.");
2310 if (!match(Cond, m_LogicalOr())) {
2311 if (match(Cond, m_LogicalAnd()) ||
2312 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2313 Direction = false;
2314 ClonedSucc = 1;
2315 }
2316 }
2317 }
2318
2319 BasicBlock *RetainedSuccBB =
2320 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2321 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2322 if (BI)
2323 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2324 else
2325 for (auto Case : SI->cases())
2326 if (Case.getCaseSuccessor() != RetainedSuccBB)
2327 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2328
2329 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2330 "Should not unswitch the same successor we are retaining!");
2331
2332 // The branch should be in this exact loop. Any inner loop's invariant branch
2333 // should be handled by unswitching that inner loop. The caller of this
2334 // routine should filter out any candidates that remain (but were skipped for
2335 // whatever reason).
2336 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2337
2338 // Compute the parent loop now before we start hacking on things.
2339 Loop *ParentL = L.getParentLoop();
2340 // Get blocks in RPO order for MSSA update, before changing the CFG.
2341 LoopBlocksRPO LBRPO(&L);
2342 if (MSSAU)
2343 LBRPO.perform(&LI);
2344
2345 // Compute the outer-most loop containing one of our exit blocks. This is the
2346 // furthest up our loopnest which can be mutated, which we will use below to
2347 // update things.
2348 Loop *OuterExitL = &L;
2350 L.getUniqueExitBlocks(ExitBlocks);
2351 for (auto *ExitBB : ExitBlocks) {
2352 // ExitBB can be an exit block for several levels in the loop nest. Make
2353 // sure we find the top most.
2354 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2355 if (!NewOuterExitL) {
2356 // We exited the entire nest with this block, so we're done.
2357 OuterExitL = nullptr;
2358 break;
2359 }
2360 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2361 OuterExitL = NewOuterExitL;
2362 }
2363
2364 // At this point, we're definitely going to unswitch something so invalidate
2365 // any cached information in ScalarEvolution for the outer most loop
2366 // containing an exit block and all nested loops.
2367 if (SE) {
2368 if (OuterExitL)
2369 SE->forgetLoop(OuterExitL);
2370 else
2371 SE->forgetTopmostLoop(&L);
2373 }
2374
2375 // If the edge from this terminator to a successor dominates that successor,
2376 // store a map from each block in its dominator subtree to it. This lets us
2377 // tell when cloning for a particular successor if a block is dominated by
2378 // some *other* successor with a single data structure. We use this to
2379 // significantly reduce cloning.
2381 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2382 UnswitchedSuccBBs))
2383 if (SuccBB->getUniquePredecessor() ||
2384 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2385 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2386 }))
2387 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2388 DominatingSucc[BB] = SuccBB;
2389 return true;
2390 });
2391
2392 // Split the preheader, so that we know that there is a safe place to insert
2393 // the conditional branch. We will change the preheader to have a conditional
2394 // branch on LoopCond. The original preheader will become the split point
2395 // between the unswitched versions, and we will have a new preheader for the
2396 // original loop.
2397 BasicBlock *SplitBB = L.getLoopPreheader();
2398 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2399
2400 // Keep track of the dominator tree updates needed.
2402
2403 // Clone the loop for each unswitched successor.
2405 VMaps.reserve(UnswitchedSuccBBs.size());
2407 for (auto *SuccBB : UnswitchedSuccBBs) {
2408 VMaps.emplace_back(new ValueToValueMapTy());
2409 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2410 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2411 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2412 }
2413
2414 // Drop metadata if we may break its semantics by moving this instr into the
2415 // split block.
2416 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2418 // Do not spend time trying to understand if we can keep it, just drop it
2419 // to save compile time.
2420 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2421 else {
2422 // It is only legal to preserve make.implicit metadata if we are
2423 // guaranteed no reach implicit null check after following this branch.
2424 ICFLoopSafetyInfo SafetyInfo;
2425 SafetyInfo.computeLoopSafetyInfo(&L);
2426 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2427 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2428 }
2429 }
2430
2431 // The stitching of the branched code back together depends on whether we're
2432 // doing full unswitching or not with the exception that we always want to
2433 // nuke the initial terminator placed in the split block.
2434 SplitBB->getTerminator()->eraseFromParent();
2435 if (FullUnswitch) {
2436 // Keep a clone of the terminator for MSSA updates.
2437 Instruction *NewTI = TI.clone();
2438 NewTI->insertInto(ParentBB, ParentBB->end());
2439
2440 // Splice the terminator from the original loop and rewrite its
2441 // successors.
2442 TI.moveBefore(*SplitBB, SplitBB->end());
2443 TI.dropLocation();
2444
2445 // First wire up the moved terminator to the preheaders.
2446 if (BI) {
2447 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2448 BI->setSuccessor(ClonedSucc, ClonedPH);
2449 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2451 if (InsertFreeze) {
2452 // We don't give any debug location to the new freeze, because the
2453 // BI (`dyn_cast<CondBrInst>(TI)`) is an in-loop instruction hoisted
2454 // out of the loop.
2455 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2457 }
2458 BI->setCondition(Cond);
2459 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2460 } else {
2461 assert(SI && "Must either be a branch or switch!");
2462
2463 // Walk the cases and directly update their successors.
2464 assert(SI->getDefaultDest() == RetainedSuccBB &&
2465 "Not retaining default successor!");
2466 SI->setDefaultDest(LoopPH);
2467 for (const auto &Case : SI->cases())
2468 if (Case.getCaseSuccessor() == RetainedSuccBB)
2469 Case.setSuccessor(LoopPH);
2470 else
2471 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2472
2473 if (InsertFreeze)
2474 SI->setCondition(new FreezeInst(SI->getCondition(),
2475 SI->getCondition()->getName() + ".fr",
2476 SI->getIterator()));
2477
2478 // We need to use the set to populate domtree updates as even when there
2479 // are multiple cases pointing at the same successor we only want to
2480 // remove and insert one edge in the domtree.
2481 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2482 DTUpdates.push_back(
2483 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2484 }
2485
2486 if (MSSAU) {
2487 DT.applyUpdates(DTUpdates);
2488 DTUpdates.clear();
2489
2490 // Remove all but one edge to the retained block and all unswitched
2491 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2492 // when we know we only keep a single edge for each case.
2493 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2494 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2495 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2496
2497 for (auto &VMap : VMaps)
2498 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2499 /*IgnoreIncomingWithNoClones=*/true);
2500 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2501
2502 // Remove all edges to unswitched blocks.
2503 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2504 MSSAU->removeEdge(ParentBB, SuccBB);
2505 }
2506
2507 // Now unhook the successor relationship as we'll be replacing
2508 // the terminator with a direct branch. This is much simpler for branches
2509 // than switches so we handle those first.
2510 if (BI) {
2511 // Remove the parent as a predecessor of the unswitched successor.
2512 assert(UnswitchedSuccBBs.size() == 1 &&
2513 "Only one possible unswitched block for a branch!");
2514 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2515 UnswitchedSuccBB->removePredecessor(ParentBB,
2516 /*KeepOneInputPHIs*/ true);
2517 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2518 } else {
2519 // Note that we actually want to remove the parent block as a predecessor
2520 // of *every* case successor. The case successor is either unswitched,
2521 // completely eliminating an edge from the parent to that successor, or it
2522 // is a duplicate edge to the retained successor as the retained successor
2523 // is always the default successor and as we'll replace this with a direct
2524 // branch we no longer need the duplicate entries in the PHI nodes.
2525 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2526 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2527 "Not retaining default successor!");
2528 for (const auto &Case : NewSI->cases())
2529 Case.getCaseSuccessor()->removePredecessor(
2530 ParentBB,
2531 /*KeepOneInputPHIs*/ true);
2532
2533 // We need to use the set to populate domtree updates as even when there
2534 // are multiple cases pointing at the same successor we only want to
2535 // remove and insert one edge in the domtree.
2536 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2537 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2538 }
2539
2540 // Create a new unconditional branch to the continuing block (as opposed to
2541 // the one cloned).
2542 Instruction *NewBI = UncondBrInst::Create(RetainedSuccBB, ParentBB);
2543 NewBI->setDebugLoc(NewTI->getDebugLoc());
2544
2545 // After MSSAU update, remove the cloned terminator instruction NewTI.
2546 NewTI->eraseFromParent();
2547 } else {
2548 assert(BI && "Only branches have partial unswitching.");
2549 assert(UnswitchedSuccBBs.size() == 1 &&
2550 "Only one possible unswitched block for a branch!");
2551 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2552 // When doing a partial unswitch, we have to do a bit more work to build up
2553 // the branch in the split block.
2554 if (PartiallyInvariant)
2556 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
2557 else {
2559 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2560 FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
2561 }
2562 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2563
2564 if (MSSAU) {
2565 DT.applyUpdates(DTUpdates);
2566 DTUpdates.clear();
2567
2568 // Perform MSSA cloning updates.
2569 for (auto &VMap : VMaps)
2570 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2571 /*IgnoreIncomingWithNoClones=*/true);
2572 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2573 }
2574 }
2575
2576 // Apply the updates accumulated above to get an up-to-date dominator tree.
2577 DT.applyUpdates(DTUpdates);
2578
2579 // Now that we have an accurate dominator tree, first delete the dead cloned
2580 // blocks so that we can accurately build any cloned loops. It is important to
2581 // not delete the blocks from the original loop yet because we still want to
2582 // reference the original loop to understand the cloned loop's structure.
2583 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2584
2585 // Build the cloned loop structure itself. This may be substantially
2586 // different from the original structure due to the simplified CFG. This also
2587 // handles inserting all the cloned blocks into the correct loops.
2588 SmallVector<Loop *, 4> NonChildClonedLoops;
2589 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2590 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2591
2592 // Now that our cloned loops have been built, we can update the original loop.
2593 // First we delete the dead blocks from it and then we rebuild the loop
2594 // structure taking these deletions into account.
2595 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2596
2597 if (MSSAU && VerifyMemorySSA)
2598 MSSAU->getMemorySSA()->verifyMemorySSA();
2599
2600 SmallVector<Loop *, 4> HoistedLoops;
2601 bool IsStillLoop =
2602 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2603
2604 if (MSSAU && VerifyMemorySSA)
2605 MSSAU->getMemorySSA()->verifyMemorySSA();
2606
2607#ifdef EXPENSIVE_CHECKS
2608 // This transformation has a high risk of corrupting the dominator tree, and
2609 // the below steps to rebuild loop structures will result in hard to debug
2610 // errors in that case so verify that the dominator tree is sane first.
2611 // FIXME: Remove this when the bugs stop showing up and rely on existing
2612 // verification steps.
2613 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2614#endif
2615
2616 if (BI && !PartiallyInvariant) {
2617 // If we unswitched a branch which collapses the condition to a known
2618 // constant we want to replace all the uses of the invariants within both
2619 // the original and cloned blocks. We do this here so that we can use the
2620 // now updated dominator tree to identify which side the users are on.
2621 assert(UnswitchedSuccBBs.size() == 1 &&
2622 "Only one possible unswitched block for a branch!");
2623 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2624
2625 // When considering multiple partially-unswitched invariants
2626 // we cant just go replace them with constants in both branches.
2627 //
2628 // For 'AND' we infer that true branch ("continue") means true
2629 // for each invariant operand.
2630 // For 'OR' we can infer that false branch ("continue") means false
2631 // for each invariant operand.
2632 // So it happens that for multiple-partial case we dont replace
2633 // in the unswitched branch.
2634 bool ReplaceUnswitched =
2635 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2636
2637 ConstantInt *UnswitchedReplacement =
2638 Direction ? ConstantInt::getTrue(BI->getContext())
2639 : ConstantInt::getFalse(BI->getContext());
2640 ConstantInt *ContinueReplacement =
2641 Direction ? ConstantInt::getFalse(BI->getContext())
2642 : ConstantInt::getTrue(BI->getContext());
2643 for (Value *Invariant : Invariants) {
2644 assert(!isa<Constant>(Invariant) &&
2645 "Should not be replacing constant values!");
2646 // Use make_early_inc_range here as set invalidates the iterator.
2647 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2648 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2649 if (!UserI)
2650 continue;
2651
2652 // Replace it with the 'continue' side if in the main loop body, and the
2653 // unswitched if in the cloned blocks.
2654 if (DT.dominates(LoopPH, UserI->getParent()))
2655 U.set(ContinueReplacement);
2656 else if (ReplaceUnswitched &&
2657 DT.dominates(ClonedPH, UserI->getParent()))
2658 U.set(UnswitchedReplacement);
2659 }
2660 }
2661 }
2662
2663 // We can change which blocks are exit blocks of all the cloned sibling
2664 // loops, the current loop, and any parent loops which shared exit blocks
2665 // with the current loop. As a consequence, we need to re-form LCSSA for
2666 // them. But we shouldn't need to re-form LCSSA for any child loops.
2667 // FIXME: This could be made more efficient by tracking which exit blocks are
2668 // new, and focusing on them, but that isn't likely to be necessary.
2669 //
2670 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2671 // loop nest and update every loop that could have had its exits changed. We
2672 // also need to cover any intervening loops. We add all of these loops to
2673 // a list and sort them by loop depth to achieve this without updating
2674 // unnecessary loops.
2675 auto UpdateLoop = [&](Loop &UpdateL) {
2676#ifndef NDEBUG
2677 UpdateL.verifyLoop();
2678 for (Loop *ChildL : UpdateL) {
2679 ChildL->verifyLoop();
2680 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2681 "Perturbed a child loop's LCSSA form!");
2682 }
2683#endif
2684 // First build LCSSA for this loop so that we can preserve it when
2685 // forming dedicated exits. We don't want to perturb some other loop's
2686 // LCSSA while doing that CFG edit.
2687 formLCSSA(UpdateL, DT, &LI, SE);
2688
2689 // For loops reached by this loop's original exit blocks we may
2690 // introduced new, non-dedicated exits. At least try to re-form dedicated
2691 // exits for these loops. This may fail if they couldn't have dedicated
2692 // exits to start with.
2693 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2694 };
2695
2696 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2697 // and we can do it in any order as they don't nest relative to each other.
2698 //
2699 // Also check if any of the loops we have updated have become top-level loops
2700 // as that will necessitate widening the outer loop scope.
2701 for (Loop *UpdatedL :
2702 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2703 UpdateLoop(*UpdatedL);
2704 if (UpdatedL->isOutermost())
2705 OuterExitL = nullptr;
2706 }
2707 if (IsStillLoop) {
2708 UpdateLoop(L);
2709 if (L.isOutermost())
2710 OuterExitL = nullptr;
2711 }
2712
2713 // If the original loop had exit blocks, walk up through the outer most loop
2714 // of those exit blocks to update LCSSA and form updated dedicated exits.
2715 if (OuterExitL != &L)
2716 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2717 OuterL = OuterL->getParentLoop())
2718 UpdateLoop(*OuterL);
2719
2720#ifdef EXPENSIVE_CHECKS
2721 // Verify the entire loop structure to catch any incorrect updates before we
2722 // progress in the pass pipeline.
2723 LI.verify(DT);
2724#endif
2725
2726 // Now that we've unswitched something, make callbacks to report the changes.
2727 // For that we need to merge together the updated loops and the cloned loops
2728 // and check whether the original loop survived.
2729 SmallVector<Loop *, 4> SibLoops;
2730 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2731 if (UpdatedL->getParentLoop() == ParentL)
2732 SibLoops.push_back(UpdatedL);
2733 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2734 InjectedCondition, SibLoops);
2735
2736 if (MSSAU && VerifyMemorySSA)
2737 MSSAU->getMemorySSA()->verifyMemorySSA();
2738
2739 if (BI)
2740 ++NumBranches;
2741 else
2742 ++NumSwitches;
2743}
2744
2745/// Recursively compute the cost of a dominator subtree based on the per-block
2746/// cost map provided.
2747///
2748/// The recursive computation is memozied into the provided DT-indexed cost map
2749/// to allow querying it for most nodes in the domtree without it becoming
2750/// quadratic.
2752 DomTreeNode &N,
2755 // Don't accumulate cost (or recurse through) blocks not in our block cost
2756 // map and thus not part of the duplication cost being considered.
2757 auto BBCostIt = BBCostMap.find(N.getBlock());
2758 if (BBCostIt == BBCostMap.end())
2759 return 0;
2760
2761 // Lookup this node to see if we already computed its cost.
2762 auto DTCostIt = DTCostMap.find(&N);
2763 if (DTCostIt != DTCostMap.end())
2764 return DTCostIt->second;
2765
2766 // If not, we have to compute it. We can't use insert above and update
2767 // because computing the cost may insert more things into the map.
2768 InstructionCost Cost = std::accumulate(
2769 N.begin(), N.end(), BBCostIt->second,
2770 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2771 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2772 });
2773 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2774 (void)Inserted;
2775 assert(Inserted && "Should not insert a node while visiting children!");
2776 return Cost;
2777}
2778
2779/// Turns a select instruction into implicit control flow branch,
2780/// making the following replacement:
2781///
2782/// head:
2783/// --code before select--
2784/// select %cond, %trueval, %falseval
2785/// --code after select--
2786///
2787/// into
2788///
2789/// head:
2790/// --code before select--
2791/// br i1 %cond, label %then, label %tail
2792///
2793/// then:
2794/// br %tail
2795///
2796/// tail:
2797/// phi [ %trueval, %then ], [ %falseval, %head]
2798/// unreachable
2799///
2800/// It also makes all relevant DT and LI updates, so that all structures are in
2801/// valid state after this transform.
2803 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2804 AssumptionCache *AC) {
2805 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2806 BasicBlock *HeadBB = SI->getParent();
2807
2808 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2809 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2810 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2811 auto *CondBr = cast<CondBrInst>(HeadBB->getTerminator());
2812 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2813 *TailBB = CondBr->getSuccessor(1);
2814 if (MSSAU)
2815 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2816
2817 PHINode *Phi =
2818 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2819 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2820 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2821 Phi->setDebugLoc(SI->getDebugLoc());
2822 SI->replaceAllUsesWith(Phi);
2823 SI->eraseFromParent();
2824
2825 if (MSSAU && VerifyMemorySSA)
2826 MSSAU->getMemorySSA()->verifyMemorySSA();
2827
2828 ++NumSelects;
2829 return CondBr;
2830}
2831
2832/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2833/// making the following replacement:
2834///
2835/// --code before guard--
2836/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2837/// --code after guard--
2838///
2839/// into
2840///
2841/// --code before guard--
2842/// br i1 %cond, label %guarded, label %deopt
2843///
2844/// guarded:
2845/// --code after guard--
2846///
2847/// deopt:
2848/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2849/// unreachable
2850///
2851/// It also makes all relevant DT and LI updates, so that all structures are in
2852/// valid state after this transform.
2854 DominatorTree &DT, LoopInfo &LI,
2855 MemorySSAUpdater *MSSAU) {
2856 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2857 BasicBlock *CheckBB = GI->getParent();
2858
2859 if (MSSAU && VerifyMemorySSA)
2860 MSSAU->getMemorySSA()->verifyMemorySSA();
2861
2862 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2863 // llvm.experimental.guard doesn't have branch weights. We can assume,
2864 // however, that the deopt path is unlikely.
2865 Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
2866 GI->getArgOperand(0), GI, true,
2869 : nullptr,
2870 &DTU, &LI);
2871 CondBrInst *CheckBI = cast<CondBrInst>(CheckBB->getTerminator());
2872 // SplitBlockAndInsertIfThen inserts control flow that branches to
2873 // DeoptBlockTerm if the condition is true. We want the opposite.
2874 CheckBI->swapSuccessors();
2875
2876 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2877 GuardedBlock->setName("guarded");
2878 CheckBI->getSuccessor(1)->setName("deopt");
2879 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2880
2881 if (MSSAU)
2882 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2883
2884 GI->moveBefore(DeoptBlockTerm->getIterator());
2886
2887 if (MSSAU) {
2889 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2890 if (VerifyMemorySSA)
2891 MSSAU->getMemorySSA()->verifyMemorySSA();
2892 }
2893
2894 if (VerifyLoopInfo)
2895 LI.verify(DT);
2896 ++NumGuards;
2897 return CheckBI;
2898}
2899
2900/// Cost multiplier is a way to limit potentially exponential behavior
2901/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2902/// candidates available. Also consider the number of "sibling" loops with
2903/// the idea of accounting for previous unswitches that already happened on this
2904/// cluster of loops. There was an attempt to keep this formula simple,
2905/// just enough to limit the worst case behavior. Even if it is not that simple
2906/// now it is still not an attempt to provide a detailed heuristic size
2907/// prediction.
2908///
2909/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2910/// unswitch candidates, making adequate predictions instead of wild guesses.
2911/// That requires knowing not just the number of "remaining" candidates but
2912/// also costs of unswitching for each of these candidates.
2914 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2915 const DominatorTree &DT,
2916 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2917
2918 // Guards and other exiting conditions do not contribute to exponential
2919 // explosion as soon as they dominate the latch (otherwise there might be
2920 // another path to the latch remaining that does not allow to eliminate the
2921 // loop copy on unswitch).
2922 const BasicBlock *Latch = L.getLoopLatch();
2923 const BasicBlock *CondBlock = TI.getParent();
2924 if (DT.dominates(CondBlock, Latch) &&
2925 (isGuard(&TI) ||
2926 (TI.isTerminator() &&
2927 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2928 return L.contains(SuccBB);
2929 }) <= 1))) {
2930 NumCostMultiplierSkipped++;
2931 return 1;
2932 }
2933
2934 // Each invariant non-trivial condition, after being unswitched, is supposed
2935 // to have its own specialized sibling loop (the invariant condition has been
2936 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2937 // conditions in nested loops, the basic block size of the outer loop should
2938 // not be altered. If such a size significantly increases across unswitching
2939 // invocations, something may be wrong; so adjust the final cost taking this
2940 // into account.
2941 auto *ParentL = L.getParentLoop();
2942 int ParentLoopSizeMultiplier = 1;
2943 if (ParentL)
2944 ParentLoopSizeMultiplier =
2945 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2946
2947 int SiblingsCount =
2948 (ParentL ? ParentL->getSubLoopsVector().size() : llvm::size(LI));
2949 // Count amount of clones that all the candidates might cause during
2950 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2951 // cases.
2952 int UnswitchedClones = 0;
2953 for (const auto &Candidate : UnswitchCandidates) {
2954 const Instruction *CI = Candidate.TI;
2955 const BasicBlock *CondBlock = CI->getParent();
2956 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2957 if (isa<SelectInst>(CI)) {
2958 UnswitchedClones++;
2959 continue;
2960 }
2961 if (isGuard(CI)) {
2962 if (!SkipExitingSuccessors)
2963 UnswitchedClones++;
2964 continue;
2965 }
2966 int NonExitingSuccessors =
2967 llvm::count_if(successors(CondBlock),
2968 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2969 return !SkipExitingSuccessors || L.contains(SuccBB);
2970 });
2971 UnswitchedClones += Log2_32(NonExitingSuccessors);
2972 }
2973
2974 // Ignore up to the "unscaled candidates" number of unswitch candidates
2975 // when calculating the power-of-two scaling of the cost. The main idea
2976 // with this control is to allow a small number of unswitches to happen
2977 // and rely more on siblings multiplier (see below) when the number
2978 // of candidates is small.
2979 unsigned ClonesPower =
2980 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2981
2982 // Allowing top-level loops to spread a bit more than nested ones.
2983 int SiblingsMultiplier =
2984 std::max((ParentL ? SiblingsCount
2985 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2986 1);
2987 // Compute the cost multiplier in a way that won't overflow by saturating
2988 // at an upper bound.
2989 int CostMultiplier;
2990 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2991 SiblingsMultiplier > UnswitchThreshold ||
2992 ParentLoopSizeMultiplier > UnswitchThreshold)
2993 CostMultiplier = UnswitchThreshold;
2994 else
2995 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2996 (int)UnswitchThreshold);
2997
2998 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2999 << " (siblings " << SiblingsMultiplier << " * parent size "
3000 << ParentLoopSizeMultiplier << " * clones "
3001 << (1 << ClonesPower) << ")"
3002 << " for unswitch candidate: " << TI << "\n");
3003 return CostMultiplier;
3004}
3005
3008 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
3009 const Loop &L, const LoopInfo &LI, AAResults &AA,
3010 const MemorySSAUpdater *MSSAU) {
3011 assert(UnswitchCandidates.empty() && "Should be!");
3012
3013 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
3015 if (isa<Constant>(Cond))
3016 return;
3017 if (L.isLoopInvariant(Cond)) {
3018 UnswitchCandidates.push_back({I, {Cond}});
3019 return;
3020 }
3022 TinyPtrVector<Value *> Invariants =
3024 L, *static_cast<Instruction *>(Cond), LI);
3025 if (!Invariants.empty())
3026 UnswitchCandidates.push_back({I, std::move(Invariants)});
3027 }
3028 };
3029
3030 // Whether or not we should also collect guards in the loop.
3031 bool CollectGuards = false;
3032 if (UnswitchGuards) {
3033 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
3034 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
3035 if (GuardDecl && !GuardDecl->use_empty())
3036 CollectGuards = true;
3037 }
3038
3039 for (auto *BB : L.blocks()) {
3040 if (LI.getLoopFor(BB) != &L)
3041 continue;
3042
3043 for (auto &I : *BB) {
3044 if (auto *SI = dyn_cast<SelectInst>(&I)) {
3045 auto *Cond = SI->getCondition();
3046 // Do not unswitch vector selects and logical and/or selects
3047 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
3048 AddUnswitchCandidatesForInst(SI, Cond);
3049 } else if (CollectGuards && isGuard(&I)) {
3050 auto *Cond =
3051 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
3052 // TODO: Support AND, OR conditions and partial unswitching.
3053 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
3054 UnswitchCandidates.push_back({&I, {Cond}});
3055 }
3056 }
3057
3058 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
3059 // We can only consider fully loop-invariant switch conditions as we need
3060 // to completely eliminate the switch after unswitching.
3061 if (!isa<Constant>(SI->getCondition()) &&
3062 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
3063 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
3064 continue;
3065 }
3066
3067 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
3068 if (!BI || BI->getSuccessor(0) == BI->getSuccessor(1))
3069 continue;
3070
3071 AddUnswitchCandidatesForInst(BI, BI->getCondition());
3072 }
3073
3074 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
3075 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
3076 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
3077 })) {
3078 MemorySSA *MSSA = MSSAU->getMemorySSA();
3079 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
3080 LLVM_DEBUG(
3081 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
3082 << *Info->InstToDuplicate[0] << "\n");
3083 PartialIVInfo = *Info;
3084 PartialIVCondBranch = L.getHeader()->getTerminator();
3085 TinyPtrVector<Value *> ValsToDuplicate;
3086 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3087 UnswitchCandidates.push_back(
3088 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3089 }
3090 }
3091 return !UnswitchCandidates.empty();
3092}
3093
3094/// Tries to canonicalize condition described by:
3095///
3096/// br (LHS pred RHS), label IfTrue, label IfFalse
3097///
3098/// into its equivalent where `Pred` is something that we support for injected
3099/// invariants (so far it is limited to ult), LHS in canonicalized form is
3100/// non-invariant and RHS is an invariant.
3102 Value *&LHS, Value *&RHS,
3103 BasicBlock *&IfTrue,
3104 BasicBlock *&IfFalse,
3105 const Loop &L) {
3106 if (!L.contains(IfTrue)) {
3107 Pred = ICmpInst::getInversePredicate(Pred);
3108 std::swap(IfTrue, IfFalse);
3109 }
3110
3111 // Move loop-invariant argument to RHS position.
3112 if (L.isLoopInvariant(LHS)) {
3113 Pred = ICmpInst::getSwappedPredicate(Pred);
3114 std::swap(LHS, RHS);
3115 }
3116
3117 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3118 // Turn "x >=s 0" into "x <u UMIN_INT"
3119 Pred = ICmpInst::ICMP_ULT;
3120 RHS = ConstantInt::get(
3121 RHS->getContext(),
3122 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3123 }
3124}
3125
3126/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3127/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3128/// injecting a loop-invariant condition.
3130 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3131 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3132 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3133 return false;
3134 // TODO: Support other predicates.
3135 if (Pred != ICmpInst::ICMP_ULT)
3136 return false;
3137 // TODO: Support non-loop-exiting branches?
3138 if (!L.contains(IfTrue) || L.contains(IfFalse))
3139 return false;
3140 // FIXME: For some reason this causes problems with MSSA updates, need to
3141 // investigate why. So far, just don't unswitch latch.
3142 if (L.getHeader() == IfTrue)
3143 return false;
3144 return true;
3145}
3146
3147/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3148/// TakenSucc via injection of invariant conditions. The branch should be not
3149/// enough and not previously unswitched, the information about this comes from
3150/// the metadata.
3152 const BasicBlock *TakenSucc) {
3153 SmallVector<uint32_t> Weights;
3154 if (!extractBranchWeights(*BI, Weights))
3155 return false;
3157 BranchProbability LikelyTaken(T - 1, T);
3158
3159 assert(Weights.size() == 2 && "Unexpected profile data!");
3160 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3161 auto Num = Weights[Idx];
3162 auto Denom = Weights[0] + Weights[1];
3163 // Degenerate or overflowed metadata.
3164 if (Denom == 0 || Num > Denom)
3165 return false;
3166 BranchProbability ActualTaken(Num, Denom);
3167 if (LikelyTaken > ActualTaken)
3168 return false;
3169 return true;
3170}
3171
3172/// Materialize pending invariant condition of the given candidate into IR. The
3173/// injected loop-invariant condition implies the original loop-variant branch
3174/// condition, so the materialization turns
3175///
3176/// loop_block:
3177/// ...
3178/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3179///
3180/// into
3181///
3182/// preheader:
3183/// %invariant_cond = LHS pred RHS
3184/// ...
3185/// loop_block:
3186/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3187/// OriginalCheck:
3188/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3189/// ...
3190static NonTrivialUnswitchCandidate
3191injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3192 DominatorTree &DT, LoopInfo &LI,
3193 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3194 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3195 BasicBlock *Preheader = L.getLoopPreheader();
3196 assert(Preheader && "Loop is not in simplified form?");
3197 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3198 "Unswitching branch of inner loop!");
3199
3200 auto Pred = Candidate.PendingInjection->Pred;
3201 auto *LHS = Candidate.PendingInjection->LHS;
3202 auto *RHS = Candidate.PendingInjection->RHS;
3203 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3204 auto *TI = cast<CondBrInst>(Candidate.TI);
3205 auto *BB = Candidate.TI->getParent();
3206 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3207 : TI->getSuccessor(0);
3208 // FIXME: Remove this once limitation on successors is lifted.
3209 assert(L.contains(InLoopSucc) && "Not supported yet!");
3210 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3211 auto &Ctx = BB->getContext();
3212
3213 IRBuilder<> Builder(Preheader->getTerminator());
3214 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3215 if (LHS->getType() != RHS->getType()) {
3216 if (LHS->getType()->getIntegerBitWidth() <
3217 RHS->getType()->getIntegerBitWidth())
3218 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3219 else
3220 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3221 }
3222 // Do not use builder here: CreateICmp may simplify this into a constant and
3223 // unswitching will break. Better optimize it away later.
3224 auto *InjectedCond =
3225 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3226 Preheader->getTerminator()->getIterator());
3227
3228 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3229 BB->getParent(), InLoopSucc);
3230 Builder.SetInsertPoint(TI);
3231 auto *InvariantBr =
3232 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3233 // We don't know anything about the relation between the limits.
3235
3236 Builder.SetInsertPoint(CheckBlock);
3237 Builder.CreateCondBr(
3238 TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
3239 !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
3240 : nullptr);
3241 TI->eraseFromParent();
3242
3243 // Fixup phis.
3244 for (auto &I : *InLoopSucc) {
3245 auto *PN = dyn_cast<PHINode>(&I);
3246 if (!PN)
3247 break;
3248 auto *Inc = PN->getIncomingValueForBlock(BB);
3249 PN->addIncoming(Inc, CheckBlock);
3250 }
3251 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3252
3254 { DominatorTree::Insert, BB, CheckBlock },
3255 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3256 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3257 { DominatorTree::Delete, BB, OutOfLoopSucc }
3258 };
3259
3260 DT.applyUpdates(DTUpdates);
3261 if (MSSAU)
3262 MSSAU->applyUpdates(DTUpdates, DT);
3263 L.addBasicBlockToLoop(CheckBlock, LI);
3264
3265#ifndef NDEBUG
3266 DT.verify();
3267 LI.verify(DT);
3268 if (MSSAU && VerifyMemorySSA)
3269 MSSAU->getMemorySSA()->verifyMemorySSA();
3270#endif
3271
3272 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3273 // higher because we have just inserted a new block. Need to think how to
3274 // adjust the cost of injected candidates when it was first computed.
3275 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3276 << " and considering it for unswitching.");
3277 ++NumInvariantConditionsInjected;
3278 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3279 Candidate.Cost);
3280}
3281
3282/// Given chain of loop branch conditions looking like:
3283/// br (Variant < Invariant1)
3284/// br (Variant < Invariant2)
3285/// br (Variant < Invariant3)
3286/// ...
3287/// collect set of invariant conditions on which we want to unswitch, which
3288/// look like:
3289/// Invariant1 <= Invariant2
3290/// Invariant2 <= Invariant3
3291/// ...
3292/// Though they might not immediately exist in the IR, we can still inject them.
3294 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3296 const DominatorTree &DT) {
3297
3300 if (Compares.size() < 2)
3301 return false;
3303 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3304 Next != Compares.end(); ++Prev, ++Next) {
3305 Value *LHS = Next->Invariant;
3306 Value *RHS = Prev->Invariant;
3307 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3308 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3309 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3310 std::nullopt, std::move(ToInject));
3311 UnswitchCandidates.push_back(std::move(Candidate));
3312 }
3313 return true;
3314}
3315
3316/// Collect unswitch candidates by invariant conditions that are not immediately
3317/// present in the loop. However, they can be injected into the code if we
3318/// decide it's profitable.
3319/// An example of such conditions is following:
3320///
3321/// for (...) {
3322/// x = load ...
3323/// if (! x <u C1) break;
3324/// if (! x <u C2) break;
3325/// <do something>
3326/// }
3327///
3328/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3329/// C2" automatically implies "x <u C2", so we can get rid of one of
3330/// loop-variant checks in unswitched loop version.
3333 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3334 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3335 const MemorySSAUpdater *MSSAU) {
3337 return false;
3338
3339 if (!DT.isReachableFromEntry(L.getHeader()))
3340 return false;
3341 auto *Latch = L.getLoopLatch();
3342 // Need to have a single latch and a preheader.
3343 if (!Latch)
3344 return false;
3345 assert(L.getLoopPreheader() && "Must have a preheader!");
3346
3348 // Traverse the conditions that dominate latch (and therefore dominate each
3349 // other).
3350 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3351 DTN = DTN->getIDom()) {
3352 CmpPredicate Pred;
3353 Value *LHS = nullptr, *RHS = nullptr;
3354 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3355 auto *BB = DTN->getBlock();
3356 // Ignore inner loops.
3357 if (LI.getLoopFor(BB) != &L)
3358 continue;
3359 auto *Term = BB->getTerminator();
3360 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3361 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3362 continue;
3363 if (!LHS->getType()->isIntegerTy())
3364 continue;
3365 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3366 L);
3367 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3368 continue;
3370 continue;
3371 // Strip ZEXT for unsigned predicate.
3372 // TODO: once signed predicates are supported, also strip SEXT.
3373 CompareDesc Desc(cast<CondBrInst>(Term), RHS, IfTrue);
3374 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3375 LHS = Zext->getOperand(0);
3376 CandidatesULT[LHS].push_back(Desc);
3377 }
3378
3379 bool Found = false;
3380 for (auto &It : CandidatesULT)
3382 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3383 return Found;
3384}
3385
3387 if (!L.isSafeToClone())
3388 return false;
3389 for (auto *BB : L.blocks())
3390 for (auto &I : *BB) {
3391 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3392 return false;
3393 if (auto *CB = dyn_cast<CallBase>(&I)) {
3394 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3395 if (CB->isConvergent())
3396 return false;
3397 }
3398 }
3399
3400 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3401 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3402 // irreducible control flow into reducible control flow and introduce new
3403 // loops "out of thin air". If we ever discover important use cases for doing
3404 // this, we can add support to loop unswitch, but it is a lot of complexity
3405 // for what seems little or no real world benefit.
3406 LoopBlocksRPO RPOT(&L);
3407 RPOT.perform(&LI);
3409 return false;
3410
3412 L.getUniqueExitBlocks(ExitBlocks);
3413 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3414 // instruction as we don't know how to split those exit blocks.
3415 // FIXME: We should teach SplitBlock to handle this and remove this
3416 // restriction.
3417 for (auto *ExitBB : ExitBlocks) {
3418 auto It = ExitBB->getFirstNonPHIIt();
3420 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3421 "in exit block\n");
3422 return false;
3423 }
3424 }
3425
3426 return true;
3427}
3428
3429static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3430 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3431 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3432 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3433 // Given that unswitching these terminators will require duplicating parts of
3434 // the loop, so we need to be able to model that cost. Compute the ephemeral
3435 // values and set up a data structure to hold per-BB costs. We cache each
3436 // block's cost so that we don't recompute this when considering different
3437 // subsets of the loop for duplication during unswitching.
3439 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3441
3442 // Compute the cost of each block, as well as the total loop cost. Also, bail
3443 // out if we see instructions which are incompatible with loop unswitching
3444 // (convergent, noduplicate, or cross-basic-block tokens).
3445 // FIXME: We might be able to safely handle some of these in non-duplicated
3446 // regions.
3448 L.getHeader()->getParent()->hasMinSize()
3451 InstructionCost LoopCost = 0;
3452 for (auto *BB : L.blocks()) {
3453 InstructionCost Cost = 0;
3454 for (auto &I : *BB) {
3455 if (EphValues.count(&I))
3456 continue;
3457 Cost += TTI.getInstructionCost(&I, CostKind);
3458 }
3459 assert(Cost >= 0 && "Must not have negative costs!");
3460 LoopCost += Cost;
3461 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3462 BBCostMap[BB] = Cost;
3463 }
3464 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3465
3466 // Now we find the best candidate by searching for the one with the following
3467 // properties in order:
3468 //
3469 // 1) An unswitching cost below the threshold
3470 // 2) The smallest number of duplicated unswitch candidates (to avoid
3471 // creating redundant subsequent unswitching)
3472 // 3) The smallest cost after unswitching.
3473 //
3474 // We prioritize reducing fanout of unswitch candidates provided the cost
3475 // remains below the threshold because this has a multiplicative effect.
3476 //
3477 // This requires memoizing each dominator subtree to avoid redundant work.
3478 //
3479 // FIXME: Need to actually do the number of candidates part above.
3481 // Given a terminator which might be unswitched, computes the non-duplicated
3482 // cost for that terminator.
3483 auto ComputeUnswitchedCost = [&](Instruction &TI,
3484 bool FullUnswitch) -> InstructionCost {
3485 // Unswitching selects unswitches the entire loop.
3486 if (isa<SelectInst>(TI))
3487 return LoopCost;
3488
3489 BasicBlock &BB = *TI.getParent();
3491
3492 InstructionCost Cost = 0;
3493 for (BasicBlock *SuccBB : successors(&BB)) {
3494 // Don't count successors more than once.
3495 if (!Visited.insert(SuccBB).second)
3496 continue;
3497
3498 // If this is a partial unswitch candidate, then it must be a conditional
3499 // branch with a condition of either `or`, `and`, their corresponding
3500 // select forms or partially invariant instructions. In that case, one of
3501 // the successors is necessarily duplicated, so don't even try to remove
3502 // its cost.
3503 if (!FullUnswitch) {
3504 auto &BI = cast<CondBrInst>(TI);
3505 Value *Cond = skipTrivialSelect(BI.getCondition());
3506 if (match(Cond, m_LogicalAnd())) {
3507 if (SuccBB == BI.getSuccessor(1))
3508 continue;
3509 } else if (match(Cond, m_LogicalOr())) {
3510 if (SuccBB == BI.getSuccessor(0))
3511 continue;
3512 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3513 SuccBB == BI.getSuccessor(0)) ||
3514 (!PartialIVInfo.KnownValue->isOneValue() &&
3515 SuccBB == BI.getSuccessor(1)))
3516 continue;
3517 }
3518
3519 // This successor's domtree will not need to be duplicated after
3520 // unswitching if the edge to the successor dominates it (and thus the
3521 // entire tree). This essentially means there is no other path into this
3522 // subtree and so it will end up live in only one clone of the loop.
3523 if (SuccBB->getUniquePredecessor() ||
3524 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3525 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3526 })) {
3527 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3528 assert(Cost <= LoopCost &&
3529 "Non-duplicated cost should never exceed total loop cost!");
3530 }
3531 }
3532
3533 // Now scale the cost by the number of unique successors minus one. We
3534 // subtract one because there is already at least one copy of the entire
3535 // loop. This is computing the new cost of unswitching a condition.
3536 // Note that guards always have 2 unique successors that are implicit and
3537 // will be materialized if we decide to unswitch it.
3538 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3539 assert(SuccessorsCount > 1 &&
3540 "Cannot unswitch a condition without multiple distinct successors!");
3541 return (LoopCost - Cost) * (SuccessorsCount - 1);
3542 };
3543
3544 std::optional<NonTrivialUnswitchCandidate> Best;
3545 for (auto &Candidate : UnswitchCandidates) {
3546 Instruction &TI = *Candidate.TI;
3547 ArrayRef<Value *> Invariants = Candidate.Invariants;
3549 bool FullUnswitch =
3550 !BI || Candidate.hasPendingInjection() ||
3551 (Invariants.size() == 1 &&
3552 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3553 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3554 // Calculate cost multiplier which is a tool to limit potentially
3555 // exponential behavior of loop-unswitch.
3557 int CostMultiplier =
3558 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3559 assert(
3560 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3561 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3562 CandidateCost *= CostMultiplier;
3563 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3564 << " (multiplier: " << CostMultiplier << ")"
3565 << " for unswitch candidate: " << TI << "\n");
3566 } else {
3567 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3568 << " for unswitch candidate: " << TI << "\n");
3569 }
3570
3571 if (!Best || CandidateCost < Best->Cost) {
3572 Best = Candidate;
3573 Best->Cost = CandidateCost;
3574 }
3575 }
3576 assert(Best && "Must be!");
3577 return *Best;
3578}
3579
3580// Insert a freeze on an unswitched branch if all is true:
3581// 1. freeze-loop-unswitch-cond option is true
3582// 2. The branch may not execute in the loop pre-transformation. If a branch may
3583// not execute and could cause UB, it would always cause UB if it is hoisted outside
3584// of the loop. Insert a freeze to prevent this case.
3585// 3. The branch condition may be poison or undef
3587 AssumptionCache &AC) {
3590 return false;
3591
3592 ICFLoopSafetyInfo SafetyInfo;
3593 SafetyInfo.computeLoopSafetyInfo(&L);
3594 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3595 return false;
3596
3597 Value *Cond;
3598 if (CondBrInst *BI = dyn_cast<CondBrInst>(&TI))
3599 Cond = skipTrivialSelect(BI->getCondition());
3600 else
3603 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3604}
3605
3609 MemorySSAUpdater *MSSAU,
3610 LPMUpdater &LoopUpdater) {
3611 // Collect all invariant conditions within this loop (as opposed to an inner
3612 // loop which would be handled when visiting that inner loop).
3614 IVConditionInfo PartialIVInfo;
3615 Instruction *PartialIVCondBranch = nullptr;
3616 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3617 PartialIVCondBranch, L, LI, AA, MSSAU);
3618 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3619 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3620 PartialIVCondBranch, L, DT, LI, AA,
3621 MSSAU);
3622 // If we didn't find any candidates, we're done.
3623 if (UnswitchCandidates.empty())
3624 return false;
3625
3626 LLVM_DEBUG(
3627 dbgs() << "Considering " << UnswitchCandidates.size()
3628 << " non-trivial loop invariant conditions for unswitching.\n");
3629
3630 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3631 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3632
3633 assert(Best.TI && "Failed to find loop unswitch candidate");
3634 assert(Best.Cost && "Failed to compute cost");
3635
3636 if (*Best.Cost >= UnswitchThreshold) {
3637 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3638 << "\n");
3639 return false;
3640 }
3641
3642 bool InjectedCondition = false;
3643 if (Best.hasPendingInjection()) {
3644 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3645 InjectedCondition = true;
3646 }
3647 assert(!Best.hasPendingInjection() &&
3648 "All injections should have been done by now!");
3649
3650 if (Best.TI != PartialIVCondBranch)
3651 PartialIVInfo.InstToDuplicate.clear();
3652
3653 bool InsertFreeze;
3654 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3655 // If the best candidate is a select, turn it into a branch. Select
3656 // instructions with a poison conditional do not propagate poison, but
3657 // branching on poison causes UB. Insert a freeze on the select
3658 // conditional to prevent UB after turning the select into a branch.
3659 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3660 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3661 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3662 } else {
3663 // If the best candidate is a guard, turn it into a branch.
3664 if (isGuard(Best.TI))
3665 Best.TI =
3666 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3667 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3668 }
3669
3670 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3671 << ") terminator: " << *Best.TI << "\n");
3672 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3673 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3674 InjectedCondition);
3675 return true;
3676}
3677
3678/// Unswitch control flow predicated on loop invariant conditions.
3679///
3680/// This first hoists all branches or switches which are trivial (IE, do not
3681/// require duplicating any part of the loop) out of the loop body. It then
3682/// looks at other loop invariant control flows and tries to unswitch those as
3683/// well by cloning the loop if the result is small enough.
3684///
3685/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3686/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3687/// valid (i.e. its use is enabled).
3688///
3689/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3690/// true, we will attempt to do non-trivial unswitching as well as trivial
3691/// unswitching.
3692///
3693/// The `postUnswitch` function will be run after unswitching is complete
3694/// with information on whether or not the provided loop remains a loop and
3695/// a list of new sibling loops created.
3696///
3697/// If `SE` is non-null, we will update that analysis based on the unswitching
3698/// done.
3699static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3701 TargetTransformInfo &TTI, bool Trivial,
3702 bool NonTrivial, ScalarEvolution *SE,
3703 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3704 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3705 "Loops must be in LCSSA form before unswitching.");
3706
3707 // Must be in loop simplified form: we need a preheader and dedicated exits.
3708 if (!L.isLoopSimplifyForm())
3709 return false;
3710
3711 // Try trivial unswitch first before loop over other basic blocks in the loop.
3712 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3713 // If we unswitched successfully we will want to clean up the loop before
3714 // processing it further so just mark it as unswitched and return.
3715 postUnswitch(L, LoopUpdater, L.getName(),
3716 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3717 /*InjectedCondition*/ false, {});
3718 return true;
3719 }
3720
3721 const Function *F = L.getHeader()->getParent();
3722
3723 // Check whether we should continue with non-trivial conditions.
3724 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3725 // unswitching for testing and debugging.
3726 // NonTrivial: Parameter that enables non-trivial unswitching for this
3727 // invocation of the transform. But this should be allowed only
3728 // for targets without branch divergence.
3729 //
3730 // FIXME: If divergence analysis becomes available to a loop
3731 // transform, we should allow unswitching for non-trivial uniform
3732 // branches even on targets that have divergence.
3733 // https://bugs.llvm.org/show_bug.cgi?id=48819
3734 bool ContinueWithNonTrivial =
3735 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3736 if (!ContinueWithNonTrivial)
3737 return false;
3738
3739 // Skip non-trivial unswitching for optsize functions.
3740 if (F->hasOptSize())
3741 return false;
3742
3743 // Perform legality checks.
3745 return false;
3746
3747 // For non-trivial unswitching, because it often creates new loops, we rely on
3748 // the pass manager to iterate on the loops rather than trying to immediately
3749 // reach a fixed point. There is no substantial advantage to iterating
3750 // internally, and if any of the new loops are simplified enough to contain
3751 // trivial unswitching we want to prefer those.
3752
3753 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3754 // a partial unswitch when possible below the threshold.
3755 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3756 return true;
3757
3758 // No other opportunities to unswitch.
3759 return false;
3760}
3761
3764 LPMUpdater &U) {
3765 Function &F = *L.getHeader()->getParent();
3766 (void)F;
3767 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3768 << "\n");
3769
3770 std::optional<MemorySSAUpdater> MSSAU;
3771 if (AR.MSSA) {
3772 MSSAU = MemorySSAUpdater(AR.MSSA);
3773 if (VerifyMemorySSA)
3774 AR.MSSA->verifyMemorySSA();
3775 }
3776 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3777 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3778 return PreservedAnalyses::all();
3779
3780 if (AR.MSSA && VerifyMemorySSA)
3781 AR.MSSA->verifyMemorySSA();
3782
3783#ifdef EXPENSIVE_CHECKS
3784 // Historically this pass has had issues with the dominator tree so verify it
3785 // in asserts builds.
3786 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3787#endif
3788
3789 auto PA = getLoopPassPreservedAnalyses();
3790 if (AR.MSSA)
3791 PA.preserve<MemorySSAAnalysis>();
3792 return PA;
3793}
3794
3796 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3798 OS, MapClassName2PassName);
3799
3800 OS << '<';
3801 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3802 OS << (Trivial ? "" : "no-") << "trivial";
3803 OS << '>';
3804}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU, const CondBrInst &OriginalBranch)
Copy a set of loop invariant values, and conditionally branch on them.
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static CondBrInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
bool shouldTryInjectBasingOnMetadata(const CondBrInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static CondBrInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static bool unswitchTrivialBranch(Loop &L, CondBrInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, const CondBrInst &ComputeProfFrom)
Copy a set of loop invariant values Invariants and insert them at the end of BB and conditionally bra...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:248
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:482
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:388
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
bool isUnsigned() const
Definition InstrTypes.h:999
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
Definition Constants.cpp:89
A debug info location.
Definition DebugLoc.h:126
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:154
static DebugLoc getDropped()
Definition DebugLoc.h:155
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator begin()
Definition DenseMap.h:139
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1216
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2683
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:221
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1570
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
void changeLoopFor(const BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:407
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:765
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getConstantMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEVConstant that is greater than or equal to (i.e.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:106
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< use_iterator > uses()
Definition Value.h:380
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BasicBlock()
Match an arbitrary basic block value and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
void stable_sort(R &&Range)
Definition STLExtras.h:2116
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
auto successors(const MachineBasicBlock *BB)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1151
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:154
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:853
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:53
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:61
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static cl::opt< bool > EstimateProfile("simple-loop-unswitch-estimate-profile", cl::Hidden, cl::init(true))
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
Definition InstrProf.h:147
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:668
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:671
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:674
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89