LLVM 22.0.0git
SimpleLoopUnswitch.cpp
Go to the documentation of this file.
1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
20#include "llvm/Analysis/CFG.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Module.h"
47#include "llvm/IR/Use.h"
48#include "llvm/IR/Value.h"
51#include "llvm/Support/Debug.h"
62#include <algorithm>
63#include <cassert>
64#include <iterator>
65#include <numeric>
66#include <optional>
67#include <utility>
68
69#define DEBUG_TYPE "simple-loop-unswitch"
70
71using namespace llvm;
72using namespace llvm::PatternMatch;
73
74STATISTIC(NumBranches, "Number of branches unswitched");
75STATISTIC(NumSwitches, "Number of switches unswitched");
76STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78STATISTIC(NumTrivial, "Number of unswitches that are trivial");
80 NumCostMultiplierSkipped,
81 "Number of unswitch candidates that had their cost multiplier skipped");
82STATISTIC(NumInvariantConditionsInjected,
83 "Number of invariant conditions injected and unswitched");
84
85namespace llvm {
87 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
88 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
89 "following the configuration passed into the pass."));
90
91static cl::opt<int>
92 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
93 cl::desc("The cost threshold for unswitching a loop."));
94
96 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
97 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
98 "explosion in nontrivial unswitch."));
100 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
101 cl::desc("Toplevel siblings divisor for cost multiplier."));
103 "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
104 cl::desc("Outer loop size divisor for cost multiplier."));
106 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
107 cl::desc("Number of unswitch candidates that are ignored when calculating "
108 "cost multiplier."));
110 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
111 cl::desc("If enabled, simple loop unswitching will also consider "
112 "llvm.experimental.guard intrinsics as unswitch candidates."));
114 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
115 cl::init(false), cl::Hidden,
116 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
117 "null checks to save time analyzing if we can keep it."));
119 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
120 cl::desc("Max number of memory uses to explore during "
121 "partial unswitching analysis"),
122 cl::init(100), cl::Hidden);
124 "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
125 cl::desc("If enabled, the freeze instruction will be added to condition "
126 "of loop unswitch to prevent miscompilation."));
127
129 "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
130 cl::desc("Whether we should inject new invariants and unswitch them to "
131 "eliminate some existing (non-invariant) conditions."),
132 cl::init(true));
133
135 "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
137 cl::desc("Only try to inject loop invariant conditions and "
138 "unswitch on them to eliminate branches that are "
139 "not-taken 1/<this option> times or less."),
140 cl::init(16));
141
142static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
143 cl::Hidden, cl::init(true));
145} // namespace llvm
146
148namespace {
149struct CompareDesc {
150 BranchInst *Term;
151 Value *Invariant;
152 BasicBlock *InLoopSucc;
153
154 CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
155 : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
156};
157
158struct InjectedInvariant {
159 ICmpInst::Predicate Pred;
160 Value *LHS;
161 Value *RHS;
162 BasicBlock *InLoopSucc;
163
164 InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
165 BasicBlock *InLoopSucc)
166 : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
167};
168
169struct NonTrivialUnswitchCandidate {
170 Instruction *TI = nullptr;
171 TinyPtrVector<Value *> Invariants;
172 std::optional<InstructionCost> Cost;
173 std::optional<InjectedInvariant> PendingInjection;
174 NonTrivialUnswitchCandidate(
175 Instruction *TI, ArrayRef<Value *> Invariants,
176 std::optional<InstructionCost> Cost = std::nullopt,
177 std::optional<InjectedInvariant> PendingInjection = std::nullopt)
178 : TI(TI), Invariants(Invariants), Cost(Cost),
179 PendingInjection(PendingInjection) {};
180
181 bool hasPendingInjection() const { return PendingInjection.has_value(); }
182};
183} // end anonymous namespace.
184
185// Helper to skip (select x, true, false), which matches both a logical AND and
186// OR and can confuse code that tries to determine if \p Cond is either a
187// logical AND or OR but not both.
189 Value *CondNext;
190 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
191 Cond = CondNext;
192 return Cond;
193}
194
195/// Collect all of the loop invariant input values transitively used by the
196/// homogeneous instruction graph from a given root.
197///
198/// This essentially walks from a root recursively through loop variant operands
199/// which have perform the same logical operation (AND or OR) and finds all
200/// inputs which are loop invariant. For some operations these can be
201/// re-associated and unswitched out of the loop entirely.
204 const LoopInfo &LI) {
205 assert(!L.isLoopInvariant(&Root) &&
206 "Only need to walk the graph if root itself is not invariant.");
207 TinyPtrVector<Value *> Invariants;
208
209 bool IsRootAnd = match(&Root, m_LogicalAnd());
210 bool IsRootOr = match(&Root, m_LogicalOr());
211
212 // Build a worklist and recurse through operators collecting invariants.
215 Worklist.push_back(&Root);
216 Visited.insert(&Root);
217 do {
218 Instruction &I = *Worklist.pop_back_val();
219 for (Value *OpV : I.operand_values()) {
220 // Skip constants as unswitching isn't interesting for them.
221 if (isa<Constant>(OpV))
222 continue;
223
224 // Add it to our result if loop invariant.
225 if (L.isLoopInvariant(OpV)) {
226 Invariants.push_back(OpV);
227 continue;
228 }
229
230 // If not an instruction with the same opcode, nothing we can do.
232
233 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
234 (IsRootOr && match(OpI, m_LogicalOr())))) {
235 // Visit this operand.
236 if (Visited.insert(OpI).second)
237 Worklist.push_back(OpI);
238 }
239 }
240 } while (!Worklist.empty());
241
242 return Invariants;
243}
244
245static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
246 Constant &Replacement) {
247 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
248
249 // Replace uses of LIC in the loop with the given constant.
250 // We use make_early_inc_range as set invalidates the iterator.
251 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
252 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
253
254 // Replace this use within the loop body.
255 if (UserI && L.contains(UserI))
256 U.set(&Replacement);
257 }
258}
259
260/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
261/// incoming values along this edge.
263 const BasicBlock &ExitingBB,
264 const BasicBlock &ExitBB) {
265 for (const Instruction &I : ExitBB) {
266 auto *PN = dyn_cast<PHINode>(&I);
267 if (!PN)
268 // No more PHIs to check.
269 return true;
270
271 // If the incoming value for this edge isn't loop invariant the unswitch
272 // won't be trivial.
273 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
274 return false;
275 }
276 llvm_unreachable("Basic blocks should never be empty!");
277}
278
279/// Copy a set of loop invariant values \p Invariants and insert them at the
280/// end of \p BB and conditionally branch on the copied condition. We only
281/// branch on a single value.
282/// We attempt to estimate the profile of the resulting conditional branch from
283/// \p ComputeProfFrom, which is the original conditional branch we're
284/// unswitching.
285/// When \p Direction is true, the \p Invariants form a disjunction, and the
286/// branch conditioned on it exits the loop on the "true" case. When \p
287/// Direction is false, the \p Invariants form a conjunction and the branch
288/// exits on the "false" case.
290 BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
291 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
292 const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
293 const BranchInst &ComputeProfFrom) {
294
295 SmallVector<uint32_t> BranchWeights;
296 bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
297 extractBranchWeights(ComputeProfFrom, BranchWeights);
298 // If Direction is true, that means we had a disjunction and that the "true"
299 // case exits. The probability of the disjunction of the subset of terms is at
300 // most as high as the original one. So, if the probability is higher than the
301 // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
302 // but if it's lower, we will use the original probability.
303 // Conversely, if Direction is false, that means we had a conjunction, and the
304 // probability of exiting is captured in the second branch weight. That
305 // probability is a disjunction (of the negation of the original terms). The
306 // same reasoning applies as above.
307 // Issue #165649: should we expect BFI to conserve, and use that to calculate
308 // the branch weights?
309 if (HasBranchWeights &&
310 static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
311 static_cast<double>(sum_of(BranchWeights)) >
312 0.5)
313 HasBranchWeights = false;
314
315 IRBuilder<> IRB(&BB);
317
318 SmallVector<Value *> FrozenInvariants;
319 for (Value *Inv : Invariants) {
320 if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
321 Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
322 FrozenInvariants.push_back(Inv);
323 }
324
325 Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
326 : IRB.CreateAnd(FrozenInvariants);
327 auto *BR = IRB.CreateCondBr(
328 Cond, Direction ? &UnswitchedSucc : &NormalSucc,
329 Direction ? &NormalSucc : &UnswitchedSucc,
330 HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
331 : nullptr);
332 if (!HasBranchWeights)
334}
335
336/// Copy a set of loop invariant values, and conditionally branch on them.
338 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
339 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
340 MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
342 for (auto *Val : reverse(ToDuplicate)) {
343 Instruction *Inst = cast<Instruction>(Val);
344 Instruction *NewInst = Inst->clone();
345
346 if (const DebugLoc &DL = Inst->getDebugLoc())
347 mapAtomInstance(DL, VMap);
348
349 NewInst->insertInto(&BB, BB.end());
350 RemapInstruction(NewInst, VMap,
352 VMap[Val] = NewInst;
353
354 if (!MSSAU)
355 continue;
356
357 MemorySSA *MSSA = MSSAU->getMemorySSA();
358 if (auto *MemUse =
360 auto *DefiningAccess = MemUse->getDefiningAccess();
361 // Get the first defining access before the loop.
362 while (L.contains(DefiningAccess->getBlock())) {
363 // If the defining access is a MemoryPhi, get the incoming
364 // value for the pre-header as defining access.
365 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
366 DefiningAccess =
367 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
368 else
369 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
370 }
371 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
372 NewInst->getParent(),
374 }
375 }
376
377 IRBuilder<> IRB(&BB);
379 Value *Cond = VMap[ToDuplicate[0]];
380 // The expectation is that ToDuplicate[0] is the condition used by the
381 // OriginalBranch, case in which we can clone the profile metadata from there.
382 auto *ProfData =
384 ToDuplicate[0] == skipTrivialSelect(OriginalBranch.getCondition())
385 ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
386 : nullptr;
387 auto *BR =
388 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
389 Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
390 if (!ProfData)
392}
393
394/// Rewrite the PHI nodes in an unswitched loop exit basic block.
395///
396/// Requires that the loop exit and unswitched basic block are the same, and
397/// that the exiting block was a unique predecessor of that block. Rewrites the
398/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
399/// PHI nodes from the old preheader that now contains the unswitched
400/// terminator.
402 BasicBlock &OldExitingBB,
403 BasicBlock &OldPH) {
404 for (PHINode &PN : UnswitchedBB.phis()) {
405 // When the loop exit is directly unswitched we just need to update the
406 // incoming basic block. We loop to handle weird cases with repeated
407 // incoming blocks, but expect to typically only have one operand here.
408 for (auto i : seq<int>(0, PN.getNumOperands())) {
409 assert(PN.getIncomingBlock(i) == &OldExitingBB &&
410 "Found incoming block different from unique predecessor!");
411 PN.setIncomingBlock(i, &OldPH);
412 }
413 }
414}
415
416/// Rewrite the PHI nodes in the loop exit basic block and the split off
417/// unswitched block.
418///
419/// Because the exit block remains an exit from the loop, this rewrites the
420/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
421/// nodes into the unswitched basic block to select between the value in the
422/// old preheader and the loop exit.
424 BasicBlock &UnswitchedBB,
425 BasicBlock &OldExitingBB,
426 BasicBlock &OldPH,
427 bool FullUnswitch) {
428 assert(&ExitBB != &UnswitchedBB &&
429 "Must have different loop exit and unswitched blocks!");
430 BasicBlock::iterator InsertPt = UnswitchedBB.begin();
431 for (PHINode &PN : ExitBB.phis()) {
432 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
433 PN.getName() + ".split");
434 NewPN->insertBefore(InsertPt);
435
436 // Walk backwards over the old PHI node's inputs to minimize the cost of
437 // removing each one. We have to do this weird loop manually so that we
438 // create the same number of new incoming edges in the new PHI as we expect
439 // each case-based edge to be included in the unswitched switch in some
440 // cases.
441 // FIXME: This is really, really gross. It would be much cleaner if LLVM
442 // allowed us to create a single entry for a predecessor block without
443 // having separate entries for each "edge" even though these edges are
444 // required to produce identical results.
445 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
446 if (PN.getIncomingBlock(i) != &OldExitingBB)
447 continue;
448
449 Value *Incoming = PN.getIncomingValue(i);
450 if (FullUnswitch)
451 // No more edge from the old exiting block to the exit block.
452 PN.removeIncomingValue(i);
453
454 NewPN->addIncoming(Incoming, &OldPH);
455 }
456
457 // Now replace the old PHI with the new one and wire the old one in as an
458 // input to the new one.
459 PN.replaceAllUsesWith(NewPN);
460 NewPN->addIncoming(&PN, &ExitBB);
461 }
462}
463
464/// Hoist the current loop up to the innermost loop containing a remaining exit.
465///
466/// Because we've removed an exit from the loop, we may have changed the set of
467/// loops reachable and need to move the current loop up the loop nest or even
468/// to an entirely separate nest.
469static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
470 DominatorTree &DT, LoopInfo &LI,
471 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
472 // If the loop is already at the top level, we can't hoist it anywhere.
473 Loop *OldParentL = L.getParentLoop();
474 if (!OldParentL)
475 return;
476
478 L.getExitBlocks(Exits);
479 Loop *NewParentL = nullptr;
480 for (auto *ExitBB : Exits)
481 if (Loop *ExitL = LI.getLoopFor(ExitBB))
482 if (!NewParentL || NewParentL->contains(ExitL))
483 NewParentL = ExitL;
484
485 if (NewParentL == OldParentL)
486 return;
487
488 // The new parent loop (if different) should always contain the old one.
489 if (NewParentL)
490 assert(NewParentL->contains(OldParentL) &&
491 "Can only hoist this loop up the nest!");
492
493 // The preheader will need to move with the body of this loop. However,
494 // because it isn't in this loop we also need to update the primary loop map.
495 assert(OldParentL == LI.getLoopFor(&Preheader) &&
496 "Parent loop of this loop should contain this loop's preheader!");
497 LI.changeLoopFor(&Preheader, NewParentL);
498
499 // Remove this loop from its old parent.
500 OldParentL->removeChildLoop(&L);
501
502 // Add the loop either to the new parent or as a top-level loop.
503 if (NewParentL)
504 NewParentL->addChildLoop(&L);
505 else
506 LI.addTopLevelLoop(&L);
507
508 // Remove this loops blocks from the old parent and every other loop up the
509 // nest until reaching the new parent. Also update all of these
510 // no-longer-containing loops to reflect the nesting change.
511 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
512 OldContainingL = OldContainingL->getParentLoop()) {
513 llvm::erase_if(OldContainingL->getBlocksVector(),
514 [&](const BasicBlock *BB) {
515 return BB == &Preheader || L.contains(BB);
516 });
517
518 OldContainingL->getBlocksSet().erase(&Preheader);
519 for (BasicBlock *BB : L.blocks())
520 OldContainingL->getBlocksSet().erase(BB);
521
522 // Because we just hoisted a loop out of this one, we have essentially
523 // created new exit paths from it. That means we need to form LCSSA PHI
524 // nodes for values used in the no-longer-nested loop.
525 formLCSSA(*OldContainingL, DT, &LI, SE);
526
527 // We shouldn't need to form dedicated exits because the exit introduced
528 // here is the (just split by unswitching) preheader. However, after trivial
529 // unswitching it is possible to get new non-dedicated exits out of parent
530 // loop so let's conservatively form dedicated exit blocks and figure out
531 // if we can optimize later.
532 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
533 /*PreserveLCSSA*/ true);
534 }
535}
536
537// Return the top-most loop containing ExitBB and having ExitBB as exiting block
538// or the loop containing ExitBB, if there is no parent loop containing ExitBB
539// as exiting block.
541 const LoopInfo &LI) {
542 Loop *TopMost = LI.getLoopFor(ExitBB);
543 Loop *Current = TopMost;
544 while (Current) {
545 if (Current->isLoopExiting(ExitBB))
546 TopMost = Current;
547 Current = Current->getParentLoop();
548 }
549 return TopMost;
550}
551
552/// Unswitch a trivial branch if the condition is loop invariant.
553///
554/// This routine should only be called when loop code leading to the branch has
555/// been validated as trivial (no side effects). This routine checks if the
556/// condition is invariant and one of the successors is a loop exit. This
557/// allows us to unswitch without duplicating the loop, making it trivial.
558///
559/// If this routine fails to unswitch the branch it returns false.
560///
561/// If the branch can be unswitched, this routine splits the preheader and
562/// hoists the branch above that split. Preserves loop simplified form
563/// (splitting the exit block as necessary). It simplifies the branch within
564/// the loop to an unconditional branch but doesn't remove it entirely. Further
565/// cleanup can be done with some simplifycfg like pass.
566///
567/// If `SE` is not null, it will be updated based on the potential loop SCEVs
568/// invalidated by this.
570 LoopInfo &LI, ScalarEvolution *SE,
571 MemorySSAUpdater *MSSAU) {
572 assert(BI.isConditional() && "Can only unswitch a conditional branch!");
573 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
574
575 // The loop invariant values that we want to unswitch.
576 TinyPtrVector<Value *> Invariants;
577
578 // When true, we're fully unswitching the branch rather than just unswitching
579 // some input conditions to the branch.
580 bool FullUnswitch = false;
581
583 if (L.isLoopInvariant(Cond)) {
584 Invariants.push_back(Cond);
585 FullUnswitch = true;
586 } else {
587 if (auto *CondInst = dyn_cast<Instruction>(Cond))
588 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
589 if (Invariants.empty()) {
590 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
591 return false;
592 }
593 }
594
595 // Check that one of the branch's successors exits, and which one.
596 bool ExitDirection = true;
597 int LoopExitSuccIdx = 0;
598 auto *LoopExitBB = BI.getSuccessor(0);
599 if (L.contains(LoopExitBB)) {
600 ExitDirection = false;
601 LoopExitSuccIdx = 1;
602 LoopExitBB = BI.getSuccessor(1);
603 if (L.contains(LoopExitBB)) {
604 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
605 return false;
606 }
607 }
608 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
609 auto *ParentBB = BI.getParent();
610 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
611 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
612 return false;
613 }
614
615 // When unswitching only part of the branch's condition, we need the exit
616 // block to be reached directly from the partially unswitched input. This can
617 // be done when the exit block is along the true edge and the branch condition
618 // is a graph of `or` operations, or the exit block is along the false edge
619 // and the condition is a graph of `and` operations.
620 if (!FullUnswitch) {
621 if (ExitDirection ? !match(Cond, m_LogicalOr())
622 : !match(Cond, m_LogicalAnd())) {
623 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
624 "non-full unswitch!\n");
625 return false;
626 }
627 }
628
629 LLVM_DEBUG({
630 dbgs() << " unswitching trivial invariant conditions for: " << BI
631 << "\n";
632 for (Value *Invariant : Invariants) {
633 dbgs() << " " << *Invariant << " == true";
634 if (Invariant != Invariants.back())
635 dbgs() << " ||";
636 dbgs() << "\n";
637 }
638 });
639
640 // If we have scalar evolutions, we need to invalidate them including this
641 // loop, the loop containing the exit block and the topmost parent loop
642 // exiting via LoopExitBB.
643 if (SE) {
644 if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
645 SE->forgetLoop(ExitL);
646 else
647 // Forget the entire nest as this exits the entire nest.
648 SE->forgetTopmostLoop(&L);
650 }
651
652 if (MSSAU && VerifyMemorySSA)
653 MSSAU->getMemorySSA()->verifyMemorySSA();
654
655 // Split the preheader, so that we know that there is a safe place to insert
656 // the conditional branch. We will change the preheader to have a conditional
657 // branch on LoopCond.
658 BasicBlock *OldPH = L.getLoopPreheader();
659 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
660
661 // Now that we have a place to insert the conditional branch, create a place
662 // to branch to: this is the exit block out of the loop that we are
663 // unswitching. We need to split this if there are other loop predecessors.
664 // Because the loop is in simplified form, *any* other predecessor is enough.
665 BasicBlock *UnswitchedBB;
666 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
667 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
668 "A branch's parent isn't a predecessor!");
669 UnswitchedBB = LoopExitBB;
670 } else {
671 UnswitchedBB =
672 SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
673 }
674
675 if (MSSAU && VerifyMemorySSA)
676 MSSAU->getMemorySSA()->verifyMemorySSA();
677
678 // Actually move the invariant uses into the unswitched position. If possible,
679 // we do this by moving the instructions, but when doing partial unswitching
680 // we do it by building a new merge of the values in the unswitched position.
681 OldPH->getTerminator()->eraseFromParent();
682 if (FullUnswitch) {
683 // If fully unswitching, we can use the existing branch instruction.
684 // Splice it into the old PH to gate reaching the new preheader and re-point
685 // its successors.
686 BI.moveBefore(*OldPH, OldPH->end());
687 BI.setCondition(Cond);
688 if (MSSAU) {
689 // Temporarily clone the terminator, to make MSSA update cheaper by
690 // separating "insert edge" updates from "remove edge" ones.
691 BI.clone()->insertInto(ParentBB, ParentBB->end());
692 } else {
693 // Create a new unconditional branch that will continue the loop as a new
694 // terminator.
695 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
696 NewBI->setDebugLoc(BI.getDebugLoc());
697 }
698 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
699 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
700 } else {
701 // Only unswitching a subset of inputs to the condition, so we will need to
702 // build a new branch that merges the invariant inputs.
703 if (ExitDirection)
705 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
706 "condition!");
707 else
709 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
710 " condition!");
712 *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
713 FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
714 }
715
716 // Update the dominator tree with the added edge.
717 DT.insertEdge(OldPH, UnswitchedBB);
718
719 // After the dominator tree was updated with the added edge, update MemorySSA
720 // if available.
721 if (MSSAU) {
723 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
724 MSSAU->applyInsertUpdates(Updates, DT);
725 }
726
727 // Finish updating dominator tree and memory ssa for full unswitch.
728 if (FullUnswitch) {
729 if (MSSAU) {
730 Instruction *Term = ParentBB->getTerminator();
731 // Remove the cloned branch instruction and create unconditional branch
732 // now.
733 Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
734 NewBI->setDebugLoc(Term->getDebugLoc());
735 Term->eraseFromParent();
736 MSSAU->removeEdge(ParentBB, LoopExitBB);
737 }
738 DT.deleteEdge(ParentBB, LoopExitBB);
739 }
740
741 if (MSSAU && VerifyMemorySSA)
742 MSSAU->getMemorySSA()->verifyMemorySSA();
743
744 // Rewrite the relevant PHI nodes.
745 if (UnswitchedBB == LoopExitBB)
746 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
747 else
748 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
749 *ParentBB, *OldPH, FullUnswitch);
750
751 // The constant we can replace all of our invariants with inside the loop
752 // body. If any of the invariants have a value other than this the loop won't
753 // be entered.
754 ConstantInt *Replacement = ExitDirection
757
758 // Since this is an i1 condition we can also trivially replace uses of it
759 // within the loop with a constant.
760 for (Value *Invariant : Invariants)
761 replaceLoopInvariantUses(L, Invariant, *Replacement);
762
763 // If this was full unswitching, we may have changed the nesting relationship
764 // for this loop so hoist it to its correct parent if needed.
765 if (FullUnswitch)
766 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
767
768 if (MSSAU && VerifyMemorySSA)
769 MSSAU->getMemorySSA()->verifyMemorySSA();
770
771 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
772 ++NumTrivial;
773 ++NumBranches;
774 return true;
775}
776
777/// Unswitch a trivial switch if the condition is loop invariant.
778///
779/// This routine should only be called when loop code leading to the switch has
780/// been validated as trivial (no side effects). This routine checks if the
781/// condition is invariant and that at least one of the successors is a loop
782/// exit. This allows us to unswitch without duplicating the loop, making it
783/// trivial.
784///
785/// If this routine fails to unswitch the switch it returns false.
786///
787/// If the switch can be unswitched, this routine splits the preheader and
788/// copies the switch above that split. If the default case is one of the
789/// exiting cases, it copies the non-exiting cases and points them at the new
790/// preheader. If the default case is not exiting, it copies the exiting cases
791/// and points the default at the preheader. It preserves loop simplified form
792/// (splitting the exit blocks as necessary). It simplifies the switch within
793/// the loop by removing now-dead cases. If the default case is one of those
794/// unswitched, it replaces its destination with a new basic block containing
795/// only unreachable. Such basic blocks, while technically loop exits, are not
796/// considered for unswitching so this is a stable transform and the same
797/// switch will not be revisited. If after unswitching there is only a single
798/// in-loop successor, the switch is further simplified to an unconditional
799/// branch. Still more cleanup can be done with some simplifycfg like pass.
800///
801/// If `SE` is not null, it will be updated based on the potential loop SCEVs
802/// invalidated by this.
804 LoopInfo &LI, ScalarEvolution *SE,
805 MemorySSAUpdater *MSSAU) {
806 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
807 Value *LoopCond = SI.getCondition();
808
809 // If this isn't switching on an invariant condition, we can't unswitch it.
810 if (!L.isLoopInvariant(LoopCond))
811 return false;
812
813 auto *ParentBB = SI.getParent();
814
815 // The same check must be used both for the default and the exit cases. We
816 // should never leave edges from the switch instruction to a basic block that
817 // we are unswitching, hence the condition used to determine the default case
818 // needs to also be used to populate ExitCaseIndices, which is then used to
819 // remove cases from the switch.
820 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
821 // BBToCheck is not an exit block if it is inside loop L.
822 if (L.contains(&BBToCheck))
823 return false;
824 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
825 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
826 return false;
827 // We do not unswitch a block that only has an unreachable statement, as
828 // it's possible this is a previously unswitched block. Only unswitch if
829 // either the terminator is not unreachable, or, if it is, it's not the only
830 // instruction in the block.
831 auto *TI = BBToCheck.getTerminator();
832 bool isUnreachable = isa<UnreachableInst>(TI);
833 return !isUnreachable || &*BBToCheck.getFirstNonPHIOrDbg() != TI;
834 };
835
836 SmallVector<int, 4> ExitCaseIndices;
837 for (auto Case : SI.cases())
838 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
839 ExitCaseIndices.push_back(Case.getCaseIndex());
840 BasicBlock *DefaultExitBB = nullptr;
843 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
844 DefaultExitBB = SI.getDefaultDest();
845 } else if (ExitCaseIndices.empty())
846 return false;
847
848 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
849
850 if (MSSAU && VerifyMemorySSA)
851 MSSAU->getMemorySSA()->verifyMemorySSA();
852
853 // We may need to invalidate SCEVs for the outermost loop reached by any of
854 // the exits.
855 Loop *OuterL = &L;
856
857 if (DefaultExitBB) {
858 // Check the loop containing this exit.
859 Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
860 if (!ExitL || ExitL->contains(OuterL))
861 OuterL = ExitL;
862 }
863 for (unsigned Index : ExitCaseIndices) {
864 auto CaseI = SI.case_begin() + Index;
865 // Compute the outer loop from this exit.
866 Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
867 if (!ExitL || ExitL->contains(OuterL))
868 OuterL = ExitL;
869 }
870
871 if (SE) {
872 if (OuterL)
873 SE->forgetLoop(OuterL);
874 else
875 SE->forgetTopmostLoop(&L);
876 }
877
878 if (DefaultExitBB) {
879 // Clear out the default destination temporarily to allow accurate
880 // predecessor lists to be examined below.
881 SI.setDefaultDest(nullptr);
882 }
883
884 // Store the exit cases into a separate data structure and remove them from
885 // the switch.
886 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
888 4> ExitCases;
889 ExitCases.reserve(ExitCaseIndices.size());
891 // We walk the case indices backwards so that we remove the last case first
892 // and don't disrupt the earlier indices.
893 for (unsigned Index : reverse(ExitCaseIndices)) {
894 auto CaseI = SI.case_begin() + Index;
895 // Save the value of this case.
896 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
897 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
898 // Delete the unswitched cases.
899 SIW.removeCase(CaseI);
900 }
901
902 // Check if after this all of the remaining cases point at the same
903 // successor.
904 BasicBlock *CommonSuccBB = nullptr;
905 if (SI.getNumCases() > 0 &&
906 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
907 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
908 }))
909 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
910 if (!DefaultExitBB) {
911 // If we're not unswitching the default, we need it to match any cases to
912 // have a common successor or if we have no cases it is the common
913 // successor.
914 if (SI.getNumCases() == 0)
915 CommonSuccBB = SI.getDefaultDest();
916 else if (SI.getDefaultDest() != CommonSuccBB)
917 CommonSuccBB = nullptr;
918 }
919
920 // Split the preheader, so that we know that there is a safe place to insert
921 // the switch.
922 BasicBlock *OldPH = L.getLoopPreheader();
923 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
924 OldPH->getTerminator()->eraseFromParent();
925
926 // Now add the unswitched switch. This new switch instruction inherits the
927 // debug location of the old switch, because it semantically replace the old
928 // one.
929 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
930 NewSI->setDebugLoc(SIW->getDebugLoc());
931 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
932
933 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
934 // First, we split any exit blocks with remaining in-loop predecessors. Then
935 // we update the PHIs in one of two ways depending on if there was a split.
936 // We walk in reverse so that we split in the same order as the cases
937 // appeared. This is purely for convenience of reading the resulting IR, but
938 // it doesn't cost anything really.
939 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
941 // Handle the default exit if necessary.
942 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
943 // ranges aren't quite powerful enough yet.
944 if (DefaultExitBB) {
945 if (pred_empty(DefaultExitBB)) {
946 UnswitchedExitBBs.insert(DefaultExitBB);
947 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
948 } else {
949 auto *SplitBB =
950 SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
951 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
952 *ParentBB, *OldPH,
953 /*FullUnswitch*/ true);
954 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
955 }
956 }
957 // Note that we must use a reference in the for loop so that we update the
958 // container.
959 for (auto &ExitCase : reverse(ExitCases)) {
960 // Grab a reference to the exit block in the pair so that we can update it.
961 BasicBlock *ExitBB = std::get<1>(ExitCase);
962
963 // If this case is the last edge into the exit block, we can simply reuse it
964 // as it will no longer be a loop exit. No mapping necessary.
965 if (pred_empty(ExitBB)) {
966 // Only rewrite once.
967 if (UnswitchedExitBBs.insert(ExitBB).second)
968 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
969 continue;
970 }
971
972 // Otherwise we need to split the exit block so that we retain an exit
973 // block from the loop and a target for the unswitched condition.
974 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
975 if (!SplitExitBB) {
976 // If this is the first time we see this, do the split and remember it.
977 SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
978 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
979 *ParentBB, *OldPH,
980 /*FullUnswitch*/ true);
981 }
982 // Update the case pair to point to the split block.
983 std::get<1>(ExitCase) = SplitExitBB;
984 }
985
986 // Now add the unswitched cases. We do this in reverse order as we built them
987 // in reverse order.
988 for (auto &ExitCase : reverse(ExitCases)) {
989 ConstantInt *CaseVal = std::get<0>(ExitCase);
990 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
991
992 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
993 }
994
995 // If the default was unswitched, re-point it and add explicit cases for
996 // entering the loop.
997 if (DefaultExitBB) {
998 NewSIW->setDefaultDest(DefaultExitBB);
999 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
1000
1001 // We removed all the exit cases, so we just copy the cases to the
1002 // unswitched switch.
1003 for (const auto &Case : SI.cases())
1004 NewSIW.addCase(Case.getCaseValue(), NewPH,
1006 } else if (DefaultCaseWeight) {
1007 // We have to set branch weight of the default case.
1008 uint64_t SW = *DefaultCaseWeight;
1009 for (const auto &Case : SI.cases()) {
1010 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
1011 assert(W &&
1012 "case weight must be defined as default case weight is defined");
1013 SW += *W;
1014 }
1015 NewSIW.setSuccessorWeight(0, SW);
1016 }
1017
1018 // If we ended up with a common successor for every path through the switch
1019 // after unswitching, rewrite it to an unconditional branch to make it easy
1020 // to recognize. Otherwise we potentially have to recognize the default case
1021 // pointing at unreachable and other complexity.
1022 if (CommonSuccBB) {
1023 BasicBlock *BB = SI.getParent();
1024 // We may have had multiple edges to this common successor block, so remove
1025 // them as predecessors. We skip the first one, either the default or the
1026 // actual first case.
1027 bool SkippedFirst = DefaultExitBB == nullptr;
1028 for (auto Case : SI.cases()) {
1029 assert(Case.getCaseSuccessor() == CommonSuccBB &&
1030 "Non-common successor!");
1031 (void)Case;
1032 if (!SkippedFirst) {
1033 SkippedFirst = true;
1034 continue;
1035 }
1036 CommonSuccBB->removePredecessor(BB,
1037 /*KeepOneInputPHIs*/ true);
1038 }
1039 // Now nuke the switch and replace it with a direct branch.
1040 Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
1041 NewBI->setDebugLoc(SIW->getDebugLoc());
1042 SIW.eraseFromParent();
1043 } else if (DefaultExitBB) {
1044 assert(SI.getNumCases() > 0 &&
1045 "If we had no cases we'd have a common successor!");
1046 // Move the last case to the default successor. This is valid as if the
1047 // default got unswitched it cannot be reached. This has the advantage of
1048 // being simple and keeping the number of edges from this switch to
1049 // successors the same, and avoiding any PHI update complexity.
1050 auto LastCaseI = std::prev(SI.case_end());
1051
1052 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
1054 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
1055 SIW.removeCase(LastCaseI);
1056 }
1057
1058 // Walk the unswitched exit blocks and the unswitched split blocks and update
1059 // the dominator tree based on the CFG edits. While we are walking unordered
1060 // containers here, the API for applyUpdates takes an unordered list of
1061 // updates and requires them to not contain duplicates.
1063 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1064 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1065 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1066 }
1067 for (auto SplitUnswitchedPair : SplitExitBBMap) {
1068 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1069 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1070 }
1071
1072 if (MSSAU) {
1073 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1074 if (VerifyMemorySSA)
1075 MSSAU->getMemorySSA()->verifyMemorySSA();
1076 } else {
1077 DT.applyUpdates(DTUpdates);
1078 }
1079
1080 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1081
1082 // We may have changed the nesting relationship for this loop so hoist it to
1083 // its correct parent if needed.
1084 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1085
1086 if (MSSAU && VerifyMemorySSA)
1087 MSSAU->getMemorySSA()->verifyMemorySSA();
1088
1089 ++NumTrivial;
1090 ++NumSwitches;
1091 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1092 return true;
1093}
1094
1095/// This routine scans the loop to find a branch or switch which occurs before
1096/// any side effects occur. These can potentially be unswitched without
1097/// duplicating the loop. If a branch or switch is successfully unswitched the
1098/// scanning continues to see if subsequent branches or switches have become
1099/// trivial. Once all trivial candidates have been unswitched, this routine
1100/// returns.
1101///
1102/// The return value indicates whether anything was unswitched (and therefore
1103/// changed).
1104///
1105/// If `SE` is not null, it will be updated based on the potential loop SCEVs
1106/// invalidated by this.
1108 LoopInfo &LI, ScalarEvolution *SE,
1109 MemorySSAUpdater *MSSAU) {
1110 bool Changed = false;
1111
1112 // If loop header has only one reachable successor we should keep looking for
1113 // trivial condition candidates in the successor as well. An alternative is
1114 // to constant fold conditions and merge successors into loop header (then we
1115 // only need to check header's terminator). The reason for not doing this in
1116 // LoopUnswitch pass is that it could potentially break LoopPassManager's
1117 // invariants. Folding dead branches could either eliminate the current loop
1118 // or make other loops unreachable. LCSSA form might also not be preserved
1119 // after deleting branches. The following code keeps traversing loop header's
1120 // successors until it finds the trivial condition candidate (condition that
1121 // is not a constant). Since unswitching generates branches with constant
1122 // conditions, this scenario could be very common in practice.
1123 BasicBlock *CurrentBB = L.getHeader();
1125 Visited.insert(CurrentBB);
1126 do {
1127 // Check if there are any side-effecting instructions (e.g. stores, calls,
1128 // volatile loads) in the part of the loop that the code *would* execute
1129 // without unswitching.
1130 if (MSSAU) // Possible early exit with MSSA
1131 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1132 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1133 return Changed;
1134 if (llvm::any_of(*CurrentBB,
1135 [](Instruction &I) { return I.mayHaveSideEffects(); }))
1136 return Changed;
1137
1138 Instruction *CurrentTerm = CurrentBB->getTerminator();
1139
1140 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1141 // Don't bother trying to unswitch past a switch with a constant
1142 // condition. This should be removed prior to running this pass by
1143 // simplifycfg.
1144 if (isa<Constant>(SI->getCondition()))
1145 return Changed;
1146
1147 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1148 // Couldn't unswitch this one so we're done.
1149 return Changed;
1150
1151 // Mark that we managed to unswitch something.
1152 Changed = true;
1153
1154 // If unswitching turned the terminator into an unconditional branch then
1155 // we can continue. The unswitching logic specifically works to fold any
1156 // cases it can into an unconditional branch to make it easier to
1157 // recognize here.
1158 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1159 if (!BI || BI->isConditional())
1160 return Changed;
1161
1162 CurrentBB = BI->getSuccessor(0);
1163 continue;
1164 }
1165
1166 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1167 if (!BI)
1168 // We do not understand other terminator instructions.
1169 return Changed;
1170
1171 // Don't bother trying to unswitch past an unconditional branch or a branch
1172 // with a constant value. These should be removed by simplifycfg prior to
1173 // running this pass.
1174 if (!BI->isConditional() ||
1175 isa<Constant>(skipTrivialSelect(BI->getCondition())))
1176 return Changed;
1177
1178 // Found a trivial condition candidate: non-foldable conditional branch. If
1179 // we fail to unswitch this, we can't do anything else that is trivial.
1180 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1181 return Changed;
1182
1183 // Mark that we managed to unswitch something.
1184 Changed = true;
1185
1186 // If we only unswitched some of the conditions feeding the branch, we won't
1187 // have collapsed it to a single successor.
1188 BI = cast<BranchInst>(CurrentBB->getTerminator());
1189 if (BI->isConditional())
1190 return Changed;
1191
1192 // Follow the newly unconditional branch into its successor.
1193 CurrentBB = BI->getSuccessor(0);
1194
1195 // When continuing, if we exit the loop or reach a previous visited block,
1196 // then we can not reach any trivial condition candidates (unfoldable
1197 // branch instructions or switch instructions) and no unswitch can happen.
1198 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1199
1200 return Changed;
1201}
1202
1203/// Build the cloned blocks for an unswitched copy of the given loop.
1204///
1205/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1206/// after the split block (`SplitBB`) that will be used to select between the
1207/// cloned and original loop.
1208///
1209/// This routine handles cloning all of the necessary loop blocks and exit
1210/// blocks including rewriting their instructions and the relevant PHI nodes.
1211/// Any loop blocks or exit blocks which are dominated by a different successor
1212/// than the one for this clone of the loop blocks can be trivially skipped. We
1213/// use the `DominatingSucc` map to determine whether a block satisfies that
1214/// property with a simple map lookup.
1215///
1216/// It also correctly creates the unconditional branch in the cloned
1217/// unswitched parent block to only point at the unswitched successor.
1218///
1219/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1220/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1221/// the cloned blocks (and their loops) are left without full `LoopInfo`
1222/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1223/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1224/// instead the caller must recompute an accurate DT. It *does* correctly
1225/// update the `AssumptionCache` provided in `AC`.
1227 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1228 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1229 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1231 ValueToValueMapTy &VMap,
1233 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1234 ScalarEvolution *SE) {
1236 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1237
1238 // We will need to clone a bunch of blocks, wrap up the clone operation in
1239 // a helper.
1240 auto CloneBlock = [&](BasicBlock *OldBB) {
1241 // Clone the basic block and insert it before the new preheader.
1242 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1243 NewBB->moveBefore(LoopPH);
1244
1245 // Record this block and the mapping.
1246 NewBlocks.push_back(NewBB);
1247 VMap[OldBB] = NewBB;
1248
1249 return NewBB;
1250 };
1251
1252 // We skip cloning blocks when they have a dominating succ that is not the
1253 // succ we are cloning for.
1254 auto SkipBlock = [&](BasicBlock *BB) {
1255 auto It = DominatingSucc.find(BB);
1256 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1257 };
1258
1259 // First, clone the preheader.
1260 auto *ClonedPH = CloneBlock(LoopPH);
1261
1262 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1263 for (auto *LoopBB : L.blocks())
1264 if (!SkipBlock(LoopBB))
1265 CloneBlock(LoopBB);
1266
1267 // Split all the loop exit edges so that when we clone the exit blocks, if
1268 // any of the exit blocks are *also* a preheader for some other loop, we
1269 // don't create multiple predecessors entering the loop header.
1270 for (auto *ExitBB : ExitBlocks) {
1271 if (SkipBlock(ExitBB))
1272 continue;
1273
1274 // When we are going to clone an exit, we don't need to clone all the
1275 // instructions in the exit block and we want to ensure we have an easy
1276 // place to merge the CFG, so split the exit first. This is always safe to
1277 // do because there cannot be any non-loop predecessors of a loop exit in
1278 // loop simplified form.
1279 auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1280
1281 // Rearrange the names to make it easier to write test cases by having the
1282 // exit block carry the suffix rather than the merge block carrying the
1283 // suffix.
1284 MergeBB->takeName(ExitBB);
1285 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1286
1287 // Now clone the original exit block.
1288 auto *ClonedExitBB = CloneBlock(ExitBB);
1289 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1290 "Exit block should have been split to have one successor!");
1291 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1292 "Cloned exit block has the wrong successor!");
1293
1294 // Remap any cloned instructions and create a merge phi node for them.
1295 for (auto ZippedInsts : llvm::zip_first(
1296 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1297 llvm::make_range(ClonedExitBB->begin(),
1298 std::prev(ClonedExitBB->end())))) {
1299 Instruction &I = std::get<0>(ZippedInsts);
1300 Instruction &ClonedI = std::get<1>(ZippedInsts);
1301
1302 // The only instructions in the exit block should be PHI nodes and
1303 // potentially a landing pad.
1304 assert(
1306 "Bad instruction in exit block!");
1307 // We should have a value map between the instruction and its clone.
1308 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1309
1310 // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1311 if (SE)
1312 if (auto *PN = dyn_cast<PHINode>(&I))
1314
1315 BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1316
1317 auto *MergePN =
1318 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1319 MergePN->insertBefore(InsertPt);
1320 MergePN->setDebugLoc(InsertPt->getDebugLoc());
1321 I.replaceAllUsesWith(MergePN);
1322 MergePN->addIncoming(&I, ExitBB);
1323 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1324 }
1325 }
1326
1327 // Rewrite the instructions in the cloned blocks to refer to the instructions
1328 // in the cloned blocks. We have to do this as a second pass so that we have
1329 // everything available. Also, we have inserted new instructions which may
1330 // include assume intrinsics, so we update the assumption cache while
1331 // processing this.
1332 Module *M = ClonedPH->getParent()->getParent();
1333 for (auto *ClonedBB : NewBlocks)
1334 for (Instruction &I : *ClonedBB) {
1335 RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1337 RemapInstruction(&I, VMap,
1339 if (auto *II = dyn_cast<AssumeInst>(&I))
1341 }
1342
1343 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1344 // have spurious incoming values.
1345 for (auto *LoopBB : L.blocks())
1346 if (SkipBlock(LoopBB))
1347 for (auto *SuccBB : successors(LoopBB))
1348 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1349 for (PHINode &PN : ClonedSuccBB->phis())
1350 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1351
1352 // Remove the cloned parent as a predecessor of any successor we ended up
1353 // cloning other than the unswitched one.
1354 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1355 for (auto *SuccBB : successors(ParentBB)) {
1356 if (SuccBB == UnswitchedSuccBB)
1357 continue;
1358
1359 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1360 if (!ClonedSuccBB)
1361 continue;
1362
1363 ClonedSuccBB->removePredecessor(ClonedParentBB,
1364 /*KeepOneInputPHIs*/ true);
1365 }
1366
1367 // Replace the cloned branch with an unconditional branch to the cloned
1368 // unswitched successor.
1369 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1370 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1371 // Trivial Simplification. If Terminator is a conditional branch and
1372 // condition becomes dead - erase it.
1373 Value *ClonedConditionToErase = nullptr;
1374 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1375 ClonedConditionToErase = BI->getCondition();
1376 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1377 ClonedConditionToErase = SI->getCondition();
1378
1379 Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1380 BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1381 ClonedTerminator->eraseFromParent();
1382
1383 if (ClonedConditionToErase)
1384 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1385 MSSAU);
1386
1387 // If there are duplicate entries in the PHI nodes because of multiple edges
1388 // to the unswitched successor, we need to nuke all but one as we replaced it
1389 // with a direct branch.
1390 for (PHINode &PN : ClonedSuccBB->phis()) {
1391 bool Found = false;
1392 // Loop over the incoming operands backwards so we can easily delete as we
1393 // go without invalidating the index.
1394 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1395 if (PN.getIncomingBlock(i) != ClonedParentBB)
1396 continue;
1397 if (!Found) {
1398 Found = true;
1399 continue;
1400 }
1401 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1402 }
1403 }
1404
1405 // Record the domtree updates for the new blocks.
1407 for (auto *ClonedBB : NewBlocks) {
1408 for (auto *SuccBB : successors(ClonedBB))
1409 if (SuccSet.insert(SuccBB).second)
1410 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1411 SuccSet.clear();
1412 }
1413
1414 return ClonedPH;
1415}
1416
1417/// Recursively clone the specified loop and all of its children.
1418///
1419/// The target parent loop for the clone should be provided, or can be null if
1420/// the clone is a top-level loop. While cloning, all the blocks are mapped
1421/// with the provided value map. The entire original loop must be present in
1422/// the value map. The cloned loop is returned.
1423static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1424 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1425 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1426 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1427 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1428 for (auto *BB : OrigL.blocks()) {
1429 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1430 ClonedL.addBlockEntry(ClonedBB);
1431 if (LI.getLoopFor(BB) == &OrigL)
1432 LI.changeLoopFor(ClonedBB, &ClonedL);
1433 }
1434 };
1435
1436 // We specially handle the first loop because it may get cloned into
1437 // a different parent and because we most commonly are cloning leaf loops.
1438 Loop *ClonedRootL = LI.AllocateLoop();
1439 if (RootParentL)
1440 RootParentL->addChildLoop(ClonedRootL);
1441 else
1442 LI.addTopLevelLoop(ClonedRootL);
1443 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1444
1445 if (OrigRootL.isInnermost())
1446 return ClonedRootL;
1447
1448 // If we have a nest, we can quickly clone the entire loop nest using an
1449 // iterative approach because it is a tree. We keep the cloned parent in the
1450 // data structure to avoid repeatedly querying through a map to find it.
1451 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1452 // Build up the loops to clone in reverse order as we'll clone them from the
1453 // back.
1454 for (Loop *ChildL : llvm::reverse(OrigRootL))
1455 LoopsToClone.push_back({ClonedRootL, ChildL});
1456 do {
1457 Loop *ClonedParentL, *L;
1458 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1459 Loop *ClonedL = LI.AllocateLoop();
1460 ClonedParentL->addChildLoop(ClonedL);
1461 AddClonedBlocksToLoop(*L, *ClonedL);
1462 for (Loop *ChildL : llvm::reverse(*L))
1463 LoopsToClone.push_back({ClonedL, ChildL});
1464 } while (!LoopsToClone.empty());
1465
1466 return ClonedRootL;
1467}
1468
1469/// Build the cloned loops of an original loop from unswitching.
1470///
1471/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1472/// operation. We need to re-verify that there even is a loop (as the backedge
1473/// may not have been cloned), and even if there are remaining backedges the
1474/// backedge set may be different. However, we know that each child loop is
1475/// undisturbed, we only need to find where to place each child loop within
1476/// either any parent loop or within a cloned version of the original loop.
1477///
1478/// Because child loops may end up cloned outside of any cloned version of the
1479/// original loop, multiple cloned sibling loops may be created. All of them
1480/// are returned so that the newly introduced loop nest roots can be
1481/// identified.
1482static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1483 const ValueToValueMapTy &VMap, LoopInfo &LI,
1484 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1485 Loop *ClonedL = nullptr;
1486
1487 auto *OrigPH = OrigL.getLoopPreheader();
1488 auto *OrigHeader = OrigL.getHeader();
1489
1490 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1491 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1492
1493 // We need to know the loops of the cloned exit blocks to even compute the
1494 // accurate parent loop. If we only clone exits to some parent of the
1495 // original parent, we want to clone into that outer loop. We also keep track
1496 // of the loops that our cloned exit blocks participate in.
1497 Loop *ParentL = nullptr;
1498 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1500 ClonedExitsInLoops.reserve(ExitBlocks.size());
1501 for (auto *ExitBB : ExitBlocks)
1502 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1503 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1504 ExitLoopMap[ClonedExitBB] = ExitL;
1505 ClonedExitsInLoops.push_back(ClonedExitBB);
1506 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1507 ParentL = ExitL;
1508 }
1509 assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1510 ParentL->contains(OrigL.getParentLoop())) &&
1511 "The computed parent loop should always contain (or be) the parent of "
1512 "the original loop.");
1513
1514 // We build the set of blocks dominated by the cloned header from the set of
1515 // cloned blocks out of the original loop. While not all of these will
1516 // necessarily be in the cloned loop, it is enough to establish that they
1517 // aren't in unreachable cycles, etc.
1518 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1519 for (auto *BB : OrigL.blocks())
1520 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1521 ClonedLoopBlocks.insert(ClonedBB);
1522
1523 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1524 // skipped cloning some region of this loop which can in turn skip some of
1525 // the backedges so we have to rebuild the blocks in the loop based on the
1526 // backedges that remain after cloning.
1528 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1529 for (auto *Pred : predecessors(ClonedHeader)) {
1530 // The only possible non-loop header predecessor is the preheader because
1531 // we know we cloned the loop in simplified form.
1532 if (Pred == ClonedPH)
1533 continue;
1534
1535 // Because the loop was in simplified form, the only non-loop predecessor
1536 // should be the preheader.
1537 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1538 "header other than the preheader "
1539 "that is not part of the loop!");
1540
1541 // Insert this block into the loop set and on the first visit (and if it
1542 // isn't the header we're currently walking) put it into the worklist to
1543 // recurse through.
1544 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1545 Worklist.push_back(Pred);
1546 }
1547
1548 // If we had any backedges then there *is* a cloned loop. Put the header into
1549 // the loop set and then walk the worklist backwards to find all the blocks
1550 // that remain within the loop after cloning.
1551 if (!BlocksInClonedLoop.empty()) {
1552 BlocksInClonedLoop.insert(ClonedHeader);
1553
1554 while (!Worklist.empty()) {
1555 BasicBlock *BB = Worklist.pop_back_val();
1556 assert(BlocksInClonedLoop.count(BB) &&
1557 "Didn't put block into the loop set!");
1558
1559 // Insert any predecessors that are in the possible set into the cloned
1560 // set, and if the insert is successful, add them to the worklist. Note
1561 // that we filter on the blocks that are definitely reachable via the
1562 // backedge to the loop header so we may prune out dead code within the
1563 // cloned loop.
1564 for (auto *Pred : predecessors(BB))
1565 if (ClonedLoopBlocks.count(Pred) &&
1566 BlocksInClonedLoop.insert(Pred).second)
1567 Worklist.push_back(Pred);
1568 }
1569
1570 ClonedL = LI.AllocateLoop();
1571 if (ParentL) {
1572 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1573 ParentL->addChildLoop(ClonedL);
1574 } else {
1575 LI.addTopLevelLoop(ClonedL);
1576 }
1577 NonChildClonedLoops.push_back(ClonedL);
1578
1579 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1580 // We don't want to just add the cloned loop blocks based on how we
1581 // discovered them. The original order of blocks was carefully built in
1582 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1583 // that logic, we just re-walk the original blocks (and those of the child
1584 // loops) and filter them as we add them into the cloned loop.
1585 for (auto *BB : OrigL.blocks()) {
1586 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1587 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1588 continue;
1589
1590 // Directly add the blocks that are only in this loop.
1591 if (LI.getLoopFor(BB) == &OrigL) {
1592 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1593 continue;
1594 }
1595
1596 // We want to manually add it to this loop and parents.
1597 // Registering it with LoopInfo will happen when we clone the top
1598 // loop for this block.
1599 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1600 PL->addBlockEntry(ClonedBB);
1601 }
1602
1603 // Now add each child loop whose header remains within the cloned loop. All
1604 // of the blocks within the loop must satisfy the same constraints as the
1605 // header so once we pass the header checks we can just clone the entire
1606 // child loop nest.
1607 for (Loop *ChildL : OrigL) {
1608 auto *ClonedChildHeader =
1609 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1610 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1611 continue;
1612
1613#ifndef NDEBUG
1614 // We should never have a cloned child loop header but fail to have
1615 // all of the blocks for that child loop.
1616 for (auto *ChildLoopBB : ChildL->blocks())
1617 assert(BlocksInClonedLoop.count(
1618 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1619 "Child cloned loop has a header within the cloned outer "
1620 "loop but not all of its blocks!");
1621#endif
1622
1623 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1624 }
1625 }
1626
1627 // Now that we've handled all the components of the original loop that were
1628 // cloned into a new loop, we still need to handle anything from the original
1629 // loop that wasn't in a cloned loop.
1630
1631 // Figure out what blocks are left to place within any loop nest containing
1632 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1633 // them.
1634 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1635 if (BlocksInClonedLoop.empty())
1636 UnloopedBlockSet.insert(ClonedPH);
1637 for (auto *ClonedBB : ClonedLoopBlocks)
1638 if (!BlocksInClonedLoop.count(ClonedBB))
1639 UnloopedBlockSet.insert(ClonedBB);
1640
1641 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1642 // backwards across these to process them inside out. The order shouldn't
1643 // matter as we're just trying to build up the map from inside-out; we use
1644 // the map in a more stably ordered way below.
1645 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1646 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1647 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1648 ExitLoopMap.lookup(RHS)->getLoopDepth();
1649 });
1650
1651 // Populate the existing ExitLoopMap with everything reachable from each
1652 // exit, starting from the inner most exit.
1653 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1654 assert(Worklist.empty() && "Didn't clear worklist!");
1655
1656 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1657 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1658
1659 // Walk the CFG back until we hit the cloned PH adding everything reachable
1660 // and in the unlooped set to this exit block's loop.
1661 Worklist.push_back(ExitBB);
1662 do {
1663 BasicBlock *BB = Worklist.pop_back_val();
1664 // We can stop recursing at the cloned preheader (if we get there).
1665 if (BB == ClonedPH)
1666 continue;
1667
1668 for (BasicBlock *PredBB : predecessors(BB)) {
1669 // If this pred has already been moved to our set or is part of some
1670 // (inner) loop, no update needed.
1671 if (!UnloopedBlockSet.erase(PredBB)) {
1672 assert(
1673 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1674 "Predecessor not mapped to a loop!");
1675 continue;
1676 }
1677
1678 // We just insert into the loop set here. We'll add these blocks to the
1679 // exit loop after we build up the set in an order that doesn't rely on
1680 // predecessor order (which in turn relies on use list order).
1681 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1682 (void)Inserted;
1683 assert(Inserted && "Should only visit an unlooped block once!");
1684
1685 // And recurse through to its predecessors.
1686 Worklist.push_back(PredBB);
1687 }
1688 } while (!Worklist.empty());
1689 }
1690
1691 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1692 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1693 // in their original order adding them to the correct loop.
1694
1695 // We need a stable insertion order. We use the order of the original loop
1696 // order and map into the correct parent loop.
1697 for (auto *BB : llvm::concat<BasicBlock *const>(
1698 ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1699 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1700 OuterL->addBasicBlockToLoop(BB, LI);
1701
1702#ifndef NDEBUG
1703 for (auto &BBAndL : ExitLoopMap) {
1704 auto *BB = BBAndL.first;
1705 auto *OuterL = BBAndL.second;
1706 assert(LI.getLoopFor(BB) == OuterL &&
1707 "Failed to put all blocks into outer loops!");
1708 }
1709#endif
1710
1711 // Now that all the blocks are placed into the correct containing loop in the
1712 // absence of child loops, find all the potentially cloned child loops and
1713 // clone them into whatever outer loop we placed their header into.
1714 for (Loop *ChildL : OrigL) {
1715 auto *ClonedChildHeader =
1716 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1717 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1718 continue;
1719
1720#ifndef NDEBUG
1721 for (auto *ChildLoopBB : ChildL->blocks())
1722 assert(VMap.count(ChildLoopBB) &&
1723 "Cloned a child loop header but not all of that loops blocks!");
1724#endif
1725
1726 NonChildClonedLoops.push_back(cloneLoopNest(
1727 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1728 }
1729}
1730
1731static void
1733 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1734 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1735 // Find all the dead clones, and remove them from their successors.
1737 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1738 for (const auto &VMap : VMaps)
1739 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1740 if (!DT.isReachableFromEntry(ClonedBB)) {
1741 for (BasicBlock *SuccBB : successors(ClonedBB))
1742 SuccBB->removePredecessor(ClonedBB);
1743 DeadBlocks.push_back(ClonedBB);
1744 }
1745
1746 // Remove all MemorySSA in the dead blocks
1747 if (MSSAU) {
1748 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1749 DeadBlocks.end());
1750 MSSAU->removeBlocks(DeadBlockSet);
1751 }
1752
1753 // Drop any remaining references to break cycles.
1754 for (BasicBlock *BB : DeadBlocks)
1755 BB->dropAllReferences();
1756 // Erase them from the IR.
1757 for (BasicBlock *BB : DeadBlocks)
1758 BB->eraseFromParent();
1759}
1760
1763 DominatorTree &DT, LoopInfo &LI,
1764 MemorySSAUpdater *MSSAU,
1765 ScalarEvolution *SE,
1766 LPMUpdater &LoopUpdater) {
1767 // Find all the dead blocks tied to this loop, and remove them from their
1768 // successors.
1770
1771 // Start with loop/exit blocks and get a transitive closure of reachable dead
1772 // blocks.
1773 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1774 ExitBlocks.end());
1775 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1776 while (!DeathCandidates.empty()) {
1777 auto *BB = DeathCandidates.pop_back_val();
1778 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1779 for (BasicBlock *SuccBB : successors(BB)) {
1780 SuccBB->removePredecessor(BB);
1781 DeathCandidates.push_back(SuccBB);
1782 }
1783 DeadBlockSet.insert(BB);
1784 }
1785 }
1786
1787 // Remove all MemorySSA in the dead blocks
1788 if (MSSAU)
1789 MSSAU->removeBlocks(DeadBlockSet);
1790
1791 // Filter out the dead blocks from the exit blocks list so that it can be
1792 // used in the caller.
1793 llvm::erase_if(ExitBlocks,
1794 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1795
1796 // Walk from this loop up through its parents removing all of the dead blocks.
1797 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1798 for (auto *BB : DeadBlockSet)
1799 ParentL->getBlocksSet().erase(BB);
1800 llvm::erase_if(ParentL->getBlocksVector(),
1801 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1802 }
1803
1804 // Now delete the dead child loops. This raw delete will clear them
1805 // recursively.
1806 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1807 if (!DeadBlockSet.count(ChildL->getHeader()))
1808 return false;
1809
1810 assert(llvm::all_of(ChildL->blocks(),
1811 [&](BasicBlock *ChildBB) {
1812 return DeadBlockSet.count(ChildBB);
1813 }) &&
1814 "If the child loop header is dead all blocks in the child loop must "
1815 "be dead as well!");
1816 LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1817 if (SE)
1819 LI.destroy(ChildL);
1820 return true;
1821 });
1822
1823 // Remove the loop mappings for the dead blocks and drop all the references
1824 // from these blocks to others to handle cyclic references as we start
1825 // deleting the blocks themselves.
1826 for (auto *BB : DeadBlockSet) {
1827 // Check that the dominator tree has already been updated.
1828 assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1829 LI.changeLoopFor(BB, nullptr);
1830 // Drop all uses of the instructions to make sure we won't have dangling
1831 // uses in other blocks.
1832 for (auto &I : *BB)
1833 if (!I.use_empty())
1834 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1835 BB->dropAllReferences();
1836 }
1837
1838 // Actually delete the blocks now that they've been fully unhooked from the
1839 // IR.
1840 for (auto *BB : DeadBlockSet)
1841 BB->eraseFromParent();
1842}
1843
1844/// Recompute the set of blocks in a loop after unswitching.
1845///
1846/// This walks from the original headers predecessors to rebuild the loop. We
1847/// take advantage of the fact that new blocks can't have been added, and so we
1848/// filter by the original loop's blocks. This also handles potentially
1849/// unreachable code that we don't want to explore but might be found examining
1850/// the predecessors of the header.
1851///
1852/// If the original loop is no longer a loop, this will return an empty set. If
1853/// it remains a loop, all the blocks within it will be added to the set
1854/// (including those blocks in inner loops).
1856 LoopInfo &LI) {
1858
1859 auto *PH = L.getLoopPreheader();
1860 auto *Header = L.getHeader();
1861
1862 // A worklist to use while walking backwards from the header.
1864
1865 // First walk the predecessors of the header to find the backedges. This will
1866 // form the basis of our walk.
1867 for (auto *Pred : predecessors(Header)) {
1868 // Skip the preheader.
1869 if (Pred == PH)
1870 continue;
1871
1872 // Because the loop was in simplified form, the only non-loop predecessor
1873 // is the preheader.
1874 assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1875 "than the preheader that is not part of the "
1876 "loop!");
1877
1878 // Insert this block into the loop set and on the first visit and, if it
1879 // isn't the header we're currently walking, put it into the worklist to
1880 // recurse through.
1881 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1882 Worklist.push_back(Pred);
1883 }
1884
1885 // If no backedges were found, we're done.
1886 if (LoopBlockSet.empty())
1887 return LoopBlockSet;
1888
1889 // We found backedges, recurse through them to identify the loop blocks.
1890 while (!Worklist.empty()) {
1891 BasicBlock *BB = Worklist.pop_back_val();
1892 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1893
1894 // No need to walk past the header.
1895 if (BB == Header)
1896 continue;
1897
1898 // Because we know the inner loop structure remains valid we can use the
1899 // loop structure to jump immediately across the entire nested loop.
1900 // Further, because it is in loop simplified form, we can directly jump
1901 // to its preheader afterward.
1902 if (Loop *InnerL = LI.getLoopFor(BB))
1903 if (InnerL != &L) {
1904 assert(L.contains(InnerL) &&
1905 "Should not reach a loop *outside* this loop!");
1906 // The preheader is the only possible predecessor of the loop so
1907 // insert it into the set and check whether it was already handled.
1908 auto *InnerPH = InnerL->getLoopPreheader();
1909 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1910 "but not contain the inner loop "
1911 "preheader!");
1912 if (!LoopBlockSet.insert(InnerPH).second)
1913 // The only way to reach the preheader is through the loop body
1914 // itself so if it has been visited the loop is already handled.
1915 continue;
1916
1917 // Insert all of the blocks (other than those already present) into
1918 // the loop set. We expect at least the block that led us to find the
1919 // inner loop to be in the block set, but we may also have other loop
1920 // blocks if they were already enqueued as predecessors of some other
1921 // outer loop block.
1922 for (auto *InnerBB : InnerL->blocks()) {
1923 if (InnerBB == BB) {
1924 assert(LoopBlockSet.count(InnerBB) &&
1925 "Block should already be in the set!");
1926 continue;
1927 }
1928
1929 LoopBlockSet.insert(InnerBB);
1930 }
1931
1932 // Add the preheader to the worklist so we will continue past the
1933 // loop body.
1934 Worklist.push_back(InnerPH);
1935 continue;
1936 }
1937
1938 // Insert any predecessors that were in the original loop into the new
1939 // set, and if the insert is successful, add them to the worklist.
1940 for (auto *Pred : predecessors(BB))
1941 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1942 Worklist.push_back(Pred);
1943 }
1944
1945 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1946
1947 // We've found all the blocks participating in the loop, return our completed
1948 // set.
1949 return LoopBlockSet;
1950}
1951
1952/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1953///
1954/// The removal may have removed some child loops entirely but cannot have
1955/// disturbed any remaining child loops. However, they may need to be hoisted
1956/// to the parent loop (or to be top-level loops). The original loop may be
1957/// completely removed.
1958///
1959/// The sibling loops resulting from this update are returned. If the original
1960/// loop remains a valid loop, it will be the first entry in this list with all
1961/// of the newly sibling loops following it.
1962///
1963/// Returns true if the loop remains a loop after unswitching, and false if it
1964/// is no longer a loop after unswitching (and should not continue to be
1965/// referenced).
1967 LoopInfo &LI,
1968 SmallVectorImpl<Loop *> &HoistedLoops,
1969 ScalarEvolution *SE) {
1970 auto *PH = L.getLoopPreheader();
1971
1972 // Compute the actual parent loop from the exit blocks. Because we may have
1973 // pruned some exits the loop may be different from the original parent.
1974 Loop *ParentL = nullptr;
1975 SmallVector<Loop *, 4> ExitLoops;
1976 SmallVector<BasicBlock *, 4> ExitsInLoops;
1977 ExitsInLoops.reserve(ExitBlocks.size());
1978 for (auto *ExitBB : ExitBlocks)
1979 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1980 ExitLoops.push_back(ExitL);
1981 ExitsInLoops.push_back(ExitBB);
1982 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1983 ParentL = ExitL;
1984 }
1985
1986 // Recompute the blocks participating in this loop. This may be empty if it
1987 // is no longer a loop.
1988 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1989
1990 // If we still have a loop, we need to re-set the loop's parent as the exit
1991 // block set changing may have moved it within the loop nest. Note that this
1992 // can only happen when this loop has a parent as it can only hoist the loop
1993 // *up* the nest.
1994 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1995 // Remove this loop's (original) blocks from all of the intervening loops.
1996 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1997 IL = IL->getParentLoop()) {
1998 IL->getBlocksSet().erase(PH);
1999 for (auto *BB : L.blocks())
2000 IL->getBlocksSet().erase(BB);
2001 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
2002 return BB == PH || L.contains(BB);
2003 });
2004 }
2005
2006 LI.changeLoopFor(PH, ParentL);
2007 L.getParentLoop()->removeChildLoop(&L);
2008 if (ParentL)
2009 ParentL->addChildLoop(&L);
2010 else
2011 LI.addTopLevelLoop(&L);
2012 }
2013
2014 // Now we update all the blocks which are no longer within the loop.
2015 auto &Blocks = L.getBlocksVector();
2016 auto BlocksSplitI =
2017 LoopBlockSet.empty()
2018 ? Blocks.begin()
2019 : std::stable_partition(
2020 Blocks.begin(), Blocks.end(),
2021 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
2022
2023 // Before we erase the list of unlooped blocks, build a set of them.
2024 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
2025 if (LoopBlockSet.empty())
2026 UnloopedBlocks.insert(PH);
2027
2028 // Now erase these blocks from the loop.
2029 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
2030 L.getBlocksSet().erase(BB);
2031 Blocks.erase(BlocksSplitI, Blocks.end());
2032
2033 // Sort the exits in ascending loop depth, we'll work backwards across these
2034 // to process them inside out.
2035 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
2036 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
2037 });
2038
2039 // We'll build up a set for each exit loop.
2040 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
2041 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
2042
2043 auto RemoveUnloopedBlocksFromLoop =
2044 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
2045 for (auto *BB : UnloopedBlocks)
2046 L.getBlocksSet().erase(BB);
2047 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
2048 return UnloopedBlocks.count(BB);
2049 });
2050 };
2051
2053 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
2054 assert(Worklist.empty() && "Didn't clear worklist!");
2055 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
2056
2057 // Grab the next exit block, in decreasing loop depth order.
2058 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2059 Loop &ExitL = *LI.getLoopFor(ExitBB);
2060 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2061
2062 // Erase all of the unlooped blocks from the loops between the previous
2063 // exit loop and this exit loop. This works because the ExitInLoops list is
2064 // sorted in increasing order of loop depth and thus we visit loops in
2065 // decreasing order of loop depth.
2066 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2067 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2068
2069 // Walk the CFG back until we hit the cloned PH adding everything reachable
2070 // and in the unlooped set to this exit block's loop.
2071 Worklist.push_back(ExitBB);
2072 do {
2073 BasicBlock *BB = Worklist.pop_back_val();
2074 // We can stop recursing at the cloned preheader (if we get there).
2075 if (BB == PH)
2076 continue;
2077
2078 for (BasicBlock *PredBB : predecessors(BB)) {
2079 // If this pred has already been moved to our set or is part of some
2080 // (inner) loop, no update needed.
2081 if (!UnloopedBlocks.erase(PredBB)) {
2082 assert((NewExitLoopBlocks.count(PredBB) ||
2083 ExitL.contains(LI.getLoopFor(PredBB))) &&
2084 "Predecessor not in a nested loop (or already visited)!");
2085 continue;
2086 }
2087
2088 // We just insert into the loop set here. We'll add these blocks to the
2089 // exit loop after we build up the set in a deterministic order rather
2090 // than the predecessor-influenced visit order.
2091 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2092 (void)Inserted;
2093 assert(Inserted && "Should only visit an unlooped block once!");
2094
2095 // And recurse through to its predecessors.
2096 Worklist.push_back(PredBB);
2097 }
2098 } while (!Worklist.empty());
2099
2100 // If blocks in this exit loop were directly part of the original loop (as
2101 // opposed to a child loop) update the map to point to this exit loop. This
2102 // just updates a map and so the fact that the order is unstable is fine.
2103 for (auto *BB : NewExitLoopBlocks)
2104 if (Loop *BBL = LI.getLoopFor(BB))
2105 if (BBL == &L || !L.contains(BBL))
2106 LI.changeLoopFor(BB, &ExitL);
2107
2108 // We will remove the remaining unlooped blocks from this loop in the next
2109 // iteration or below.
2110 NewExitLoopBlocks.clear();
2111 }
2112
2113 // Any remaining unlooped blocks are no longer part of any loop unless they
2114 // are part of some child loop.
2115 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2116 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2117 for (auto *BB : UnloopedBlocks)
2118 if (Loop *BBL = LI.getLoopFor(BB))
2119 if (BBL == &L || !L.contains(BBL))
2120 LI.changeLoopFor(BB, nullptr);
2121
2122 // Sink all the child loops whose headers are no longer in the loop set to
2123 // the parent (or to be top level loops). We reach into the loop and directly
2124 // update its subloop vector to make this batch update efficient.
2125 auto &SubLoops = L.getSubLoopsVector();
2126 auto SubLoopsSplitI =
2127 LoopBlockSet.empty()
2128 ? SubLoops.begin()
2129 : std::stable_partition(
2130 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2131 return LoopBlockSet.count(SubL->getHeader());
2132 });
2133 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2134 HoistedLoops.push_back(HoistedL);
2135 HoistedL->setParentLoop(nullptr);
2136
2137 // To compute the new parent of this hoisted loop we look at where we
2138 // placed the preheader above. We can't lookup the header itself because we
2139 // retained the mapping from the header to the hoisted loop. But the
2140 // preheader and header should have the exact same new parent computed
2141 // based on the set of exit blocks from the original loop as the preheader
2142 // is a predecessor of the header and so reached in the reverse walk. And
2143 // because the loops were all in simplified form the preheader of the
2144 // hoisted loop can't be part of some *other* loop.
2145 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2146 NewParentL->addChildLoop(HoistedL);
2147 else
2148 LI.addTopLevelLoop(HoistedL);
2149 }
2150 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2151
2152 // Actually delete the loop if nothing remained within it.
2153 if (Blocks.empty()) {
2154 assert(SubLoops.empty() &&
2155 "Failed to remove all subloops from the original loop!");
2156 if (Loop *ParentL = L.getParentLoop())
2157 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2158 else
2159 LI.removeLoop(llvm::find(LI, &L));
2160 // markLoopAsDeleted for L should be triggered by the caller (it is
2161 // typically done within postUnswitch).
2162 if (SE)
2164 LI.destroy(&L);
2165 return false;
2166 }
2167
2168 return true;
2169}
2170
2171/// Helper to visit a dominator subtree, invoking a callable on each node.
2172///
2173/// Returning false at any point will stop walking past that node of the tree.
2174template <typename CallableT>
2175void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2177 DomWorklist.push_back(DT[BB]);
2178#ifndef NDEBUG
2180 Visited.insert(DT[BB]);
2181#endif
2182 do {
2183 DomTreeNode *N = DomWorklist.pop_back_val();
2184
2185 // Visit this node.
2186 if (!Callable(N->getBlock()))
2187 continue;
2188
2189 // Accumulate the child nodes.
2190 for (DomTreeNode *ChildN : *N) {
2191 assert(Visited.insert(ChildN).second &&
2192 "Cannot visit a node twice when walking a tree!");
2193 DomWorklist.push_back(ChildN);
2194 }
2195 } while (!DomWorklist.empty());
2196}
2197
2199 bool CurrentLoopValid, bool PartiallyInvariant,
2200 bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2201 // If we did a non-trivial unswitch, we have added new (cloned) loops.
2202 if (!NewLoops.empty())
2203 U.addSiblingLoops(NewLoops);
2204
2205 // If the current loop remains valid, we should revisit it to catch any
2206 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2207 if (CurrentLoopValid) {
2208 if (PartiallyInvariant) {
2209 // Mark the new loop as partially unswitched, to avoid unswitching on
2210 // the same condition again.
2211 auto &Context = L.getHeader()->getContext();
2212 MDNode *DisableUnswitchMD = MDNode::get(
2213 Context,
2214 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2216 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2217 {DisableUnswitchMD});
2218 L.setLoopID(NewLoopID);
2219 } else if (InjectedCondition) {
2220 // Do the same for injection of invariant conditions.
2221 auto &Context = L.getHeader()->getContext();
2222 MDNode *DisableUnswitchMD = MDNode::get(
2223 Context,
2224 MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2226 Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2227 {DisableUnswitchMD});
2228 L.setLoopID(NewLoopID);
2229 } else
2230 U.revisitCurrentLoop();
2231 } else
2232 U.markLoopAsDeleted(L, LoopName);
2233}
2234
2236 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2237 IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2239 LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2240 auto *ParentBB = TI.getParent();
2242 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2243
2244 // Save the current loop name in a variable so that we can report it even
2245 // after it has been deleted.
2246 std::string LoopName(L.getName());
2247
2248 // We can only unswitch switches, conditional branches with an invariant
2249 // condition, or combining invariant conditions with an instruction or
2250 // partially invariant instructions.
2251 assert((SI || (BI && BI->isConditional())) &&
2252 "Can only unswitch switches and conditional branch!");
2253 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2254 bool FullUnswitch =
2255 SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2256 !PartiallyInvariant);
2257 if (FullUnswitch)
2258 assert(Invariants.size() == 1 &&
2259 "Cannot have other invariants with full unswitching!");
2260 else
2262 "Partial unswitching requires an instruction as the condition!");
2263
2264 if (MSSAU && VerifyMemorySSA)
2265 MSSAU->getMemorySSA()->verifyMemorySSA();
2266
2267 // Constant and BBs tracking the cloned and continuing successor. When we are
2268 // unswitching the entire condition, this can just be trivially chosen to
2269 // unswitch towards `true`. However, when we are unswitching a set of
2270 // invariants combined with `and` or `or` or partially invariant instructions,
2271 // the combining operation determines the best direction to unswitch: we want
2272 // to unswitch the direction that will collapse the branch.
2273 bool Direction = true;
2274 int ClonedSucc = 0;
2275 if (!FullUnswitch) {
2277 (void)Cond;
2279 PartiallyInvariant) &&
2280 "Only `or`, `and`, an `select`, partially invariant instructions "
2281 "can combine invariants being unswitched.");
2282 if (!match(Cond, m_LogicalOr())) {
2283 if (match(Cond, m_LogicalAnd()) ||
2284 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2285 Direction = false;
2286 ClonedSucc = 1;
2287 }
2288 }
2289 }
2290
2291 BasicBlock *RetainedSuccBB =
2292 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2293 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2294 if (BI)
2295 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2296 else
2297 for (auto Case : SI->cases())
2298 if (Case.getCaseSuccessor() != RetainedSuccBB)
2299 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2300
2301 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2302 "Should not unswitch the same successor we are retaining!");
2303
2304 // The branch should be in this exact loop. Any inner loop's invariant branch
2305 // should be handled by unswitching that inner loop. The caller of this
2306 // routine should filter out any candidates that remain (but were skipped for
2307 // whatever reason).
2308 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2309
2310 // Compute the parent loop now before we start hacking on things.
2311 Loop *ParentL = L.getParentLoop();
2312 // Get blocks in RPO order for MSSA update, before changing the CFG.
2313 LoopBlocksRPO LBRPO(&L);
2314 if (MSSAU)
2315 LBRPO.perform(&LI);
2316
2317 // Compute the outer-most loop containing one of our exit blocks. This is the
2318 // furthest up our loopnest which can be mutated, which we will use below to
2319 // update things.
2320 Loop *OuterExitL = &L;
2322 L.getUniqueExitBlocks(ExitBlocks);
2323 for (auto *ExitBB : ExitBlocks) {
2324 // ExitBB can be an exit block for several levels in the loop nest. Make
2325 // sure we find the top most.
2326 Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2327 if (!NewOuterExitL) {
2328 // We exited the entire nest with this block, so we're done.
2329 OuterExitL = nullptr;
2330 break;
2331 }
2332 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2333 OuterExitL = NewOuterExitL;
2334 }
2335
2336 // At this point, we're definitely going to unswitch something so invalidate
2337 // any cached information in ScalarEvolution for the outer most loop
2338 // containing an exit block and all nested loops.
2339 if (SE) {
2340 if (OuterExitL)
2341 SE->forgetLoop(OuterExitL);
2342 else
2343 SE->forgetTopmostLoop(&L);
2345 }
2346
2347 // If the edge from this terminator to a successor dominates that successor,
2348 // store a map from each block in its dominator subtree to it. This lets us
2349 // tell when cloning for a particular successor if a block is dominated by
2350 // some *other* successor with a single data structure. We use this to
2351 // significantly reduce cloning.
2353 for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2354 UnswitchedSuccBBs))
2355 if (SuccBB->getUniquePredecessor() ||
2356 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2357 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2358 }))
2359 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2360 DominatingSucc[BB] = SuccBB;
2361 return true;
2362 });
2363
2364 // Split the preheader, so that we know that there is a safe place to insert
2365 // the conditional branch. We will change the preheader to have a conditional
2366 // branch on LoopCond. The original preheader will become the split point
2367 // between the unswitched versions, and we will have a new preheader for the
2368 // original loop.
2369 BasicBlock *SplitBB = L.getLoopPreheader();
2370 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2371
2372 // Keep track of the dominator tree updates needed.
2374
2375 // Clone the loop for each unswitched successor.
2377 VMaps.reserve(UnswitchedSuccBBs.size());
2379 for (auto *SuccBB : UnswitchedSuccBBs) {
2380 VMaps.emplace_back(new ValueToValueMapTy());
2381 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2382 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2383 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2384 }
2385
2386 // Drop metadata if we may break its semantics by moving this instr into the
2387 // split block.
2388 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2390 // Do not spend time trying to understand if we can keep it, just drop it
2391 // to save compile time.
2392 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2393 else {
2394 // It is only legal to preserve make.implicit metadata if we are
2395 // guaranteed no reach implicit null check after following this branch.
2396 ICFLoopSafetyInfo SafetyInfo;
2397 SafetyInfo.computeLoopSafetyInfo(&L);
2398 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2399 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2400 }
2401 }
2402
2403 // The stitching of the branched code back together depends on whether we're
2404 // doing full unswitching or not with the exception that we always want to
2405 // nuke the initial terminator placed in the split block.
2406 SplitBB->getTerminator()->eraseFromParent();
2407 if (FullUnswitch) {
2408 // Keep a clone of the terminator for MSSA updates.
2409 Instruction *NewTI = TI.clone();
2410 NewTI->insertInto(ParentBB, ParentBB->end());
2411
2412 // Splice the terminator from the original loop and rewrite its
2413 // successors.
2414 TI.moveBefore(*SplitBB, SplitBB->end());
2415 TI.dropLocation();
2416
2417 // First wire up the moved terminator to the preheaders.
2418 if (BI) {
2419 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2420 BI->setSuccessor(ClonedSucc, ClonedPH);
2421 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2423 if (InsertFreeze) {
2424 // We don't give any debug location to the new freeze, because the
2425 // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2426 // out of the loop.
2427 Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2429 }
2430 BI->setCondition(Cond);
2431 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2432 } else {
2433 assert(SI && "Must either be a branch or switch!");
2434
2435 // Walk the cases and directly update their successors.
2436 assert(SI->getDefaultDest() == RetainedSuccBB &&
2437 "Not retaining default successor!");
2438 SI->setDefaultDest(LoopPH);
2439 for (const auto &Case : SI->cases())
2440 if (Case.getCaseSuccessor() == RetainedSuccBB)
2441 Case.setSuccessor(LoopPH);
2442 else
2443 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2444
2445 if (InsertFreeze)
2446 SI->setCondition(new FreezeInst(SI->getCondition(),
2447 SI->getCondition()->getName() + ".fr",
2448 SI->getIterator()));
2449
2450 // We need to use the set to populate domtree updates as even when there
2451 // are multiple cases pointing at the same successor we only want to
2452 // remove and insert one edge in the domtree.
2453 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2454 DTUpdates.push_back(
2455 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2456 }
2457
2458 if (MSSAU) {
2459 DT.applyUpdates(DTUpdates);
2460 DTUpdates.clear();
2461
2462 // Remove all but one edge to the retained block and all unswitched
2463 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2464 // when we know we only keep a single edge for each case.
2465 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2466 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2467 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2468
2469 for (auto &VMap : VMaps)
2470 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2471 /*IgnoreIncomingWithNoClones=*/true);
2472 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2473
2474 // Remove all edges to unswitched blocks.
2475 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2476 MSSAU->removeEdge(ParentBB, SuccBB);
2477 }
2478
2479 // Now unhook the successor relationship as we'll be replacing
2480 // the terminator with a direct branch. This is much simpler for branches
2481 // than switches so we handle those first.
2482 if (BI) {
2483 // Remove the parent as a predecessor of the unswitched successor.
2484 assert(UnswitchedSuccBBs.size() == 1 &&
2485 "Only one possible unswitched block for a branch!");
2486 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2487 UnswitchedSuccBB->removePredecessor(ParentBB,
2488 /*KeepOneInputPHIs*/ true);
2489 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2490 } else {
2491 // Note that we actually want to remove the parent block as a predecessor
2492 // of *every* case successor. The case successor is either unswitched,
2493 // completely eliminating an edge from the parent to that successor, or it
2494 // is a duplicate edge to the retained successor as the retained successor
2495 // is always the default successor and as we'll replace this with a direct
2496 // branch we no longer need the duplicate entries in the PHI nodes.
2497 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2498 assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2499 "Not retaining default successor!");
2500 for (const auto &Case : NewSI->cases())
2501 Case.getCaseSuccessor()->removePredecessor(
2502 ParentBB,
2503 /*KeepOneInputPHIs*/ true);
2504
2505 // We need to use the set to populate domtree updates as even when there
2506 // are multiple cases pointing at the same successor we only want to
2507 // remove and insert one edge in the domtree.
2508 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2509 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2510 }
2511
2512 // Create a new unconditional branch to the continuing block (as opposed to
2513 // the one cloned).
2514 Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2515 NewBI->setDebugLoc(NewTI->getDebugLoc());
2516
2517 // After MSSAU update, remove the cloned terminator instruction NewTI.
2518 NewTI->eraseFromParent();
2519 } else {
2520 assert(BI && "Only branches have partial unswitching.");
2521 assert(UnswitchedSuccBBs.size() == 1 &&
2522 "Only one possible unswitched block for a branch!");
2523 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2524 // When doing a partial unswitch, we have to do a bit more work to build up
2525 // the branch in the split block.
2526 if (PartiallyInvariant)
2528 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
2529 else {
2531 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2532 FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
2533 }
2534 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2535
2536 if (MSSAU) {
2537 DT.applyUpdates(DTUpdates);
2538 DTUpdates.clear();
2539
2540 // Perform MSSA cloning updates.
2541 for (auto &VMap : VMaps)
2542 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2543 /*IgnoreIncomingWithNoClones=*/true);
2544 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2545 }
2546 }
2547
2548 // Apply the updates accumulated above to get an up-to-date dominator tree.
2549 DT.applyUpdates(DTUpdates);
2550
2551 // Now that we have an accurate dominator tree, first delete the dead cloned
2552 // blocks so that we can accurately build any cloned loops. It is important to
2553 // not delete the blocks from the original loop yet because we still want to
2554 // reference the original loop to understand the cloned loop's structure.
2555 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2556
2557 // Build the cloned loop structure itself. This may be substantially
2558 // different from the original structure due to the simplified CFG. This also
2559 // handles inserting all the cloned blocks into the correct loops.
2560 SmallVector<Loop *, 4> NonChildClonedLoops;
2561 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2562 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2563
2564 // Now that our cloned loops have been built, we can update the original loop.
2565 // First we delete the dead blocks from it and then we rebuild the loop
2566 // structure taking these deletions into account.
2567 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2568
2569 if (MSSAU && VerifyMemorySSA)
2570 MSSAU->getMemorySSA()->verifyMemorySSA();
2571
2572 SmallVector<Loop *, 4> HoistedLoops;
2573 bool IsStillLoop =
2574 rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2575
2576 if (MSSAU && VerifyMemorySSA)
2577 MSSAU->getMemorySSA()->verifyMemorySSA();
2578
2579 // This transformation has a high risk of corrupting the dominator tree, and
2580 // the below steps to rebuild loop structures will result in hard to debug
2581 // errors in that case so verify that the dominator tree is sane first.
2582 // FIXME: Remove this when the bugs stop showing up and rely on existing
2583 // verification steps.
2584 assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2585
2586 if (BI && !PartiallyInvariant) {
2587 // If we unswitched a branch which collapses the condition to a known
2588 // constant we want to replace all the uses of the invariants within both
2589 // the original and cloned blocks. We do this here so that we can use the
2590 // now updated dominator tree to identify which side the users are on.
2591 assert(UnswitchedSuccBBs.size() == 1 &&
2592 "Only one possible unswitched block for a branch!");
2593 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2594
2595 // When considering multiple partially-unswitched invariants
2596 // we cant just go replace them with constants in both branches.
2597 //
2598 // For 'AND' we infer that true branch ("continue") means true
2599 // for each invariant operand.
2600 // For 'OR' we can infer that false branch ("continue") means false
2601 // for each invariant operand.
2602 // So it happens that for multiple-partial case we dont replace
2603 // in the unswitched branch.
2604 bool ReplaceUnswitched =
2605 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2606
2607 ConstantInt *UnswitchedReplacement =
2610 ConstantInt *ContinueReplacement =
2613 for (Value *Invariant : Invariants) {
2614 assert(!isa<Constant>(Invariant) &&
2615 "Should not be replacing constant values!");
2616 // Use make_early_inc_range here as set invalidates the iterator.
2617 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2618 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2619 if (!UserI)
2620 continue;
2621
2622 // Replace it with the 'continue' side if in the main loop body, and the
2623 // unswitched if in the cloned blocks.
2624 if (DT.dominates(LoopPH, UserI->getParent()))
2625 U.set(ContinueReplacement);
2626 else if (ReplaceUnswitched &&
2627 DT.dominates(ClonedPH, UserI->getParent()))
2628 U.set(UnswitchedReplacement);
2629 }
2630 }
2631 }
2632
2633 // We can change which blocks are exit blocks of all the cloned sibling
2634 // loops, the current loop, and any parent loops which shared exit blocks
2635 // with the current loop. As a consequence, we need to re-form LCSSA for
2636 // them. But we shouldn't need to re-form LCSSA for any child loops.
2637 // FIXME: This could be made more efficient by tracking which exit blocks are
2638 // new, and focusing on them, but that isn't likely to be necessary.
2639 //
2640 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2641 // loop nest and update every loop that could have had its exits changed. We
2642 // also need to cover any intervening loops. We add all of these loops to
2643 // a list and sort them by loop depth to achieve this without updating
2644 // unnecessary loops.
2645 auto UpdateLoop = [&](Loop &UpdateL) {
2646#ifndef NDEBUG
2647 UpdateL.verifyLoop();
2648 for (Loop *ChildL : UpdateL) {
2649 ChildL->verifyLoop();
2650 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2651 "Perturbed a child loop's LCSSA form!");
2652 }
2653#endif
2654 // First build LCSSA for this loop so that we can preserve it when
2655 // forming dedicated exits. We don't want to perturb some other loop's
2656 // LCSSA while doing that CFG edit.
2657 formLCSSA(UpdateL, DT, &LI, SE);
2658
2659 // For loops reached by this loop's original exit blocks we may
2660 // introduced new, non-dedicated exits. At least try to re-form dedicated
2661 // exits for these loops. This may fail if they couldn't have dedicated
2662 // exits to start with.
2663 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2664 };
2665
2666 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2667 // and we can do it in any order as they don't nest relative to each other.
2668 //
2669 // Also check if any of the loops we have updated have become top-level loops
2670 // as that will necessitate widening the outer loop scope.
2671 for (Loop *UpdatedL :
2672 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2673 UpdateLoop(*UpdatedL);
2674 if (UpdatedL->isOutermost())
2675 OuterExitL = nullptr;
2676 }
2677 if (IsStillLoop) {
2678 UpdateLoop(L);
2679 if (L.isOutermost())
2680 OuterExitL = nullptr;
2681 }
2682
2683 // If the original loop had exit blocks, walk up through the outer most loop
2684 // of those exit blocks to update LCSSA and form updated dedicated exits.
2685 if (OuterExitL != &L)
2686 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2687 OuterL = OuterL->getParentLoop())
2688 UpdateLoop(*OuterL);
2689
2690#ifndef NDEBUG
2691 // Verify the entire loop structure to catch any incorrect updates before we
2692 // progress in the pass pipeline.
2693 LI.verify(DT);
2694#endif
2695
2696 // Now that we've unswitched something, make callbacks to report the changes.
2697 // For that we need to merge together the updated loops and the cloned loops
2698 // and check whether the original loop survived.
2699 SmallVector<Loop *, 4> SibLoops;
2700 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2701 if (UpdatedL->getParentLoop() == ParentL)
2702 SibLoops.push_back(UpdatedL);
2703 postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2704 InjectedCondition, SibLoops);
2705
2706 if (MSSAU && VerifyMemorySSA)
2707 MSSAU->getMemorySSA()->verifyMemorySSA();
2708
2709 if (BI)
2710 ++NumBranches;
2711 else
2712 ++NumSwitches;
2713}
2714
2715/// Recursively compute the cost of a dominator subtree based on the per-block
2716/// cost map provided.
2717///
2718/// The recursive computation is memozied into the provided DT-indexed cost map
2719/// to allow querying it for most nodes in the domtree without it becoming
2720/// quadratic.
2722 DomTreeNode &N,
2725 // Don't accumulate cost (or recurse through) blocks not in our block cost
2726 // map and thus not part of the duplication cost being considered.
2727 auto BBCostIt = BBCostMap.find(N.getBlock());
2728 if (BBCostIt == BBCostMap.end())
2729 return 0;
2730
2731 // Lookup this node to see if we already computed its cost.
2732 auto DTCostIt = DTCostMap.find(&N);
2733 if (DTCostIt != DTCostMap.end())
2734 return DTCostIt->second;
2735
2736 // If not, we have to compute it. We can't use insert above and update
2737 // because computing the cost may insert more things into the map.
2738 InstructionCost Cost = std::accumulate(
2739 N.begin(), N.end(), BBCostIt->second,
2740 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2741 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2742 });
2743 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2744 (void)Inserted;
2745 assert(Inserted && "Should not insert a node while visiting children!");
2746 return Cost;
2747}
2748
2749/// Turns a select instruction into implicit control flow branch,
2750/// making the following replacement:
2751///
2752/// head:
2753/// --code before select--
2754/// select %cond, %trueval, %falseval
2755/// --code after select--
2756///
2757/// into
2758///
2759/// head:
2760/// --code before select--
2761/// br i1 %cond, label %then, label %tail
2762///
2763/// then:
2764/// br %tail
2765///
2766/// tail:
2767/// phi [ %trueval, %then ], [ %falseval, %head]
2768/// unreachable
2769///
2770/// It also makes all relevant DT and LI updates, so that all structures are in
2771/// valid state after this transform.
2773 LoopInfo &LI, MemorySSAUpdater *MSSAU,
2774 AssumptionCache *AC) {
2775 LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
2776 BasicBlock *HeadBB = SI->getParent();
2777
2778 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2779 SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
2780 SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2781 auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
2782 BasicBlock *ThenBB = CondBr->getSuccessor(0),
2783 *TailBB = CondBr->getSuccessor(1);
2784 if (MSSAU)
2785 MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
2786
2787 PHINode *Phi =
2788 PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
2789 Phi->addIncoming(SI->getTrueValue(), ThenBB);
2790 Phi->addIncoming(SI->getFalseValue(), HeadBB);
2791 Phi->setDebugLoc(SI->getDebugLoc());
2792 SI->replaceAllUsesWith(Phi);
2793 SI->eraseFromParent();
2794
2795 if (MSSAU && VerifyMemorySSA)
2796 MSSAU->getMemorySSA()->verifyMemorySSA();
2797
2798 ++NumSelects;
2799 return CondBr;
2800}
2801
2802/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2803/// making the following replacement:
2804///
2805/// --code before guard--
2806/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2807/// --code after guard--
2808///
2809/// into
2810///
2811/// --code before guard--
2812/// br i1 %cond, label %guarded, label %deopt
2813///
2814/// guarded:
2815/// --code after guard--
2816///
2817/// deopt:
2818/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2819/// unreachable
2820///
2821/// It also makes all relevant DT and LI updates, so that all structures are in
2822/// valid state after this transform.
2824 DominatorTree &DT, LoopInfo &LI,
2825 MemorySSAUpdater *MSSAU) {
2826 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2827 BasicBlock *CheckBB = GI->getParent();
2828
2829 if (MSSAU && VerifyMemorySSA)
2830 MSSAU->getMemorySSA()->verifyMemorySSA();
2831
2832 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2833 // llvm.experimental.guard doesn't have branch weights. We can assume,
2834 // however, that the deopt path is unlikely.
2835 Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
2836 GI->getArgOperand(0), GI, true,
2839 : nullptr,
2840 &DTU, &LI);
2841 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2842 // SplitBlockAndInsertIfThen inserts control flow that branches to
2843 // DeoptBlockTerm if the condition is true. We want the opposite.
2844 CheckBI->swapSuccessors();
2845
2846 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2847 GuardedBlock->setName("guarded");
2848 CheckBI->getSuccessor(1)->setName("deopt");
2849 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2850
2851 if (MSSAU)
2852 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2853
2854 GI->moveBefore(DeoptBlockTerm->getIterator());
2856
2857 if (MSSAU) {
2859 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2860 if (VerifyMemorySSA)
2861 MSSAU->getMemorySSA()->verifyMemorySSA();
2862 }
2863
2864 if (VerifyLoopInfo)
2865 LI.verify(DT);
2866 ++NumGuards;
2867 return CheckBI;
2868}
2869
2870/// Cost multiplier is a way to limit potentially exponential behavior
2871/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
2872/// candidates available. Also consider the number of "sibling" loops with
2873/// the idea of accounting for previous unswitches that already happened on this
2874/// cluster of loops. There was an attempt to keep this formula simple,
2875/// just enough to limit the worst case behavior. Even if it is not that simple
2876/// now it is still not an attempt to provide a detailed heuristic size
2877/// prediction.
2878///
2879/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2880/// unswitch candidates, making adequate predictions instead of wild guesses.
2881/// That requires knowing not just the number of "remaining" candidates but
2882/// also costs of unswitching for each of these candidates.
2884 const Instruction &TI, const Loop &L, const LoopInfo &LI,
2885 const DominatorTree &DT,
2886 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2887
2888 // Guards and other exiting conditions do not contribute to exponential
2889 // explosion as soon as they dominate the latch (otherwise there might be
2890 // another path to the latch remaining that does not allow to eliminate the
2891 // loop copy on unswitch).
2892 const BasicBlock *Latch = L.getLoopLatch();
2893 const BasicBlock *CondBlock = TI.getParent();
2894 if (DT.dominates(CondBlock, Latch) &&
2895 (isGuard(&TI) ||
2896 (TI.isTerminator() &&
2897 llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2898 return L.contains(SuccBB);
2899 }) <= 1))) {
2900 NumCostMultiplierSkipped++;
2901 return 1;
2902 }
2903
2904 // Each invariant non-trivial condition, after being unswitched, is supposed
2905 // to have its own specialized sibling loop (the invariant condition has been
2906 // hoisted out of the child loop into a newly-cloned loop). When unswitching
2907 // conditions in nested loops, the basic block size of the outer loop should
2908 // not be altered. If such a size significantly increases across unswitching
2909 // invocations, something may be wrong; so adjust the final cost taking this
2910 // into account.
2911 auto *ParentL = L.getParentLoop();
2912 int ParentLoopSizeMultiplier = 1;
2913 if (ParentL)
2914 ParentLoopSizeMultiplier =
2915 std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
2916
2917 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2918 : std::distance(LI.begin(), LI.end()));
2919 // Count amount of clones that all the candidates might cause during
2920 // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
2921 // cases.
2922 int UnswitchedClones = 0;
2923 for (const auto &Candidate : UnswitchCandidates) {
2924 const Instruction *CI = Candidate.TI;
2925 const BasicBlock *CondBlock = CI->getParent();
2926 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2927 if (isa<SelectInst>(CI)) {
2928 UnswitchedClones++;
2929 continue;
2930 }
2931 if (isGuard(CI)) {
2932 if (!SkipExitingSuccessors)
2933 UnswitchedClones++;
2934 continue;
2935 }
2936 int NonExitingSuccessors =
2937 llvm::count_if(successors(CondBlock),
2938 [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2939 return !SkipExitingSuccessors || L.contains(SuccBB);
2940 });
2941 UnswitchedClones += Log2_32(NonExitingSuccessors);
2942 }
2943
2944 // Ignore up to the "unscaled candidates" number of unswitch candidates
2945 // when calculating the power-of-two scaling of the cost. The main idea
2946 // with this control is to allow a small number of unswitches to happen
2947 // and rely more on siblings multiplier (see below) when the number
2948 // of candidates is small.
2949 unsigned ClonesPower =
2950 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2951
2952 // Allowing top-level loops to spread a bit more than nested ones.
2953 int SiblingsMultiplier =
2954 std::max((ParentL ? SiblingsCount
2955 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2956 1);
2957 // Compute the cost multiplier in a way that won't overflow by saturating
2958 // at an upper bound.
2959 int CostMultiplier;
2960 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2961 SiblingsMultiplier > UnswitchThreshold ||
2962 ParentLoopSizeMultiplier > UnswitchThreshold)
2963 CostMultiplier = UnswitchThreshold;
2964 else
2965 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2966 (int)UnswitchThreshold);
2967
2968 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2969 << " (siblings " << SiblingsMultiplier << " * parent size "
2970 << ParentLoopSizeMultiplier << " * clones "
2971 << (1 << ClonesPower) << ")"
2972 << " for unswitch candidate: " << TI << "\n");
2973 return CostMultiplier;
2974}
2975
2978 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2979 const Loop &L, const LoopInfo &LI, AAResults &AA,
2980 const MemorySSAUpdater *MSSAU) {
2981 assert(UnswitchCandidates.empty() && "Should be!");
2982
2983 auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
2985 if (isa<Constant>(Cond))
2986 return;
2987 if (L.isLoopInvariant(Cond)) {
2988 UnswitchCandidates.push_back({I, {Cond}});
2989 return;
2990 }
2992 TinyPtrVector<Value *> Invariants =
2994 L, *static_cast<Instruction *>(Cond), LI);
2995 if (!Invariants.empty())
2996 UnswitchCandidates.push_back({I, std::move(Invariants)});
2997 }
2998 };
2999
3000 // Whether or not we should also collect guards in the loop.
3001 bool CollectGuards = false;
3002 if (UnswitchGuards) {
3003 auto *GuardDecl = Intrinsic::getDeclarationIfExists(
3004 L.getHeader()->getParent()->getParent(), Intrinsic::experimental_guard);
3005 if (GuardDecl && !GuardDecl->use_empty())
3006 CollectGuards = true;
3007 }
3008
3009 for (auto *BB : L.blocks()) {
3010 if (LI.getLoopFor(BB) != &L)
3011 continue;
3012
3013 for (auto &I : *BB) {
3014 if (auto *SI = dyn_cast<SelectInst>(&I)) {
3015 auto *Cond = SI->getCondition();
3016 // Do not unswitch vector selects and logical and/or selects
3017 if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
3018 AddUnswitchCandidatesForInst(SI, Cond);
3019 } else if (CollectGuards && isGuard(&I)) {
3020 auto *Cond =
3021 skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
3022 // TODO: Support AND, OR conditions and partial unswitching.
3023 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
3024 UnswitchCandidates.push_back({&I, {Cond}});
3025 }
3026 }
3027
3028 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
3029 // We can only consider fully loop-invariant switch conditions as we need
3030 // to completely eliminate the switch after unswitching.
3031 if (!isa<Constant>(SI->getCondition()) &&
3032 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
3033 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
3034 continue;
3035 }
3036
3037 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
3038 if (!BI || !BI->isConditional() ||
3039 BI->getSuccessor(0) == BI->getSuccessor(1))
3040 continue;
3041
3042 AddUnswitchCandidatesForInst(BI, BI->getCondition());
3043 }
3044
3045 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
3046 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
3047 return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
3048 })) {
3049 MemorySSA *MSSA = MSSAU->getMemorySSA();
3050 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
3051 LLVM_DEBUG(
3052 dbgs() << "simple-loop-unswitch: Found partially invariant condition "
3053 << *Info->InstToDuplicate[0] << "\n");
3054 PartialIVInfo = *Info;
3055 PartialIVCondBranch = L.getHeader()->getTerminator();
3056 TinyPtrVector<Value *> ValsToDuplicate;
3057 llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
3058 UnswitchCandidates.push_back(
3059 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
3060 }
3061 }
3062 return !UnswitchCandidates.empty();
3063}
3064
3065/// Tries to canonicalize condition described by:
3066///
3067/// br (LHS pred RHS), label IfTrue, label IfFalse
3068///
3069/// into its equivalent where `Pred` is something that we support for injected
3070/// invariants (so far it is limited to ult), LHS in canonicalized form is
3071/// non-invariant and RHS is an invariant.
3073 Value *&LHS, Value *&RHS,
3074 BasicBlock *&IfTrue,
3075 BasicBlock *&IfFalse,
3076 const Loop &L) {
3077 if (!L.contains(IfTrue)) {
3078 Pred = ICmpInst::getInversePredicate(Pred);
3079 std::swap(IfTrue, IfFalse);
3080 }
3081
3082 // Move loop-invariant argument to RHS position.
3083 if (L.isLoopInvariant(LHS)) {
3084 Pred = ICmpInst::getSwappedPredicate(Pred);
3085 std::swap(LHS, RHS);
3086 }
3087
3088 if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
3089 // Turn "x >=s 0" into "x <u UMIN_INT"
3090 Pred = ICmpInst::ICMP_ULT;
3091 RHS = ConstantInt::get(
3092 RHS->getContext(),
3093 APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
3094 }
3095}
3096
3097/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
3098/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
3099/// injecting a loop-invariant condition.
3101 const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
3102 const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
3103 if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
3104 return false;
3105 // TODO: Support other predicates.
3106 if (Pred != ICmpInst::ICMP_ULT)
3107 return false;
3108 // TODO: Support non-loop-exiting branches?
3109 if (!L.contains(IfTrue) || L.contains(IfFalse))
3110 return false;
3111 // FIXME: For some reason this causes problems with MSSA updates, need to
3112 // investigate why. So far, just don't unswitch latch.
3113 if (L.getHeader() == IfTrue)
3114 return false;
3115 return true;
3116}
3117
3118/// Returns true, if metadata on \p BI allows us to optimize branching into \p
3119/// TakenSucc via injection of invariant conditions. The branch should be not
3120/// enough and not previously unswitched, the information about this comes from
3121/// the metadata.
3123 const BasicBlock *TakenSucc) {
3124 SmallVector<uint32_t> Weights;
3125 if (!extractBranchWeights(*BI, Weights))
3126 return false;
3128 BranchProbability LikelyTaken(T - 1, T);
3129
3130 assert(Weights.size() == 2 && "Unexpected profile data!");
3131 size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
3132 auto Num = Weights[Idx];
3133 auto Denom = Weights[0] + Weights[1];
3134 // Degenerate or overflowed metadata.
3135 if (Denom == 0 || Num > Denom)
3136 return false;
3137 BranchProbability ActualTaken(Num, Denom);
3138 if (LikelyTaken > ActualTaken)
3139 return false;
3140 return true;
3141}
3142
3143/// Materialize pending invariant condition of the given candidate into IR. The
3144/// injected loop-invariant condition implies the original loop-variant branch
3145/// condition, so the materialization turns
3146///
3147/// loop_block:
3148/// ...
3149/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3150///
3151/// into
3152///
3153/// preheader:
3154/// %invariant_cond = LHS pred RHS
3155/// ...
3156/// loop_block:
3157/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
3158/// OriginalCheck:
3159/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
3160/// ...
3161static NonTrivialUnswitchCandidate
3162injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
3163 DominatorTree &DT, LoopInfo &LI,
3164 AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
3165 assert(Candidate.hasPendingInjection() && "Nothing to inject!");
3166 BasicBlock *Preheader = L.getLoopPreheader();
3167 assert(Preheader && "Loop is not in simplified form?");
3168 assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
3169 "Unswitching branch of inner loop!");
3170
3171 auto Pred = Candidate.PendingInjection->Pred;
3172 auto *LHS = Candidate.PendingInjection->LHS;
3173 auto *RHS = Candidate.PendingInjection->RHS;
3174 auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
3175 auto *TI = cast<BranchInst>(Candidate.TI);
3176 auto *BB = Candidate.TI->getParent();
3177 auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
3178 : TI->getSuccessor(0);
3179 // FIXME: Remove this once limitation on successors is lifted.
3180 assert(L.contains(InLoopSucc) && "Not supported yet!");
3181 assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
3182 auto &Ctx = BB->getContext();
3183
3184 IRBuilder<> Builder(Preheader->getTerminator());
3185 assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
3186 if (LHS->getType() != RHS->getType()) {
3187 if (LHS->getType()->getIntegerBitWidth() <
3188 RHS->getType()->getIntegerBitWidth())
3189 LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
3190 else
3191 RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
3192 }
3193 // Do not use builder here: CreateICmp may simplify this into a constant and
3194 // unswitching will break. Better optimize it away later.
3195 auto *InjectedCond =
3196 ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3197 Preheader->getTerminator()->getIterator());
3198
3199 BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
3200 BB->getParent(), InLoopSucc);
3201 Builder.SetInsertPoint(TI);
3202 auto *InvariantBr =
3203 Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
3204 // We don't know anything about the relation between the limits.
3206
3207 Builder.SetInsertPoint(CheckBlock);
3208 Builder.CreateCondBr(
3209 TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
3210 !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
3211 : nullptr);
3212 TI->eraseFromParent();
3213
3214 // Fixup phis.
3215 for (auto &I : *InLoopSucc) {
3216 auto *PN = dyn_cast<PHINode>(&I);
3217 if (!PN)
3218 break;
3219 auto *Inc = PN->getIncomingValueForBlock(BB);
3220 PN->addIncoming(Inc, CheckBlock);
3221 }
3222 OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
3223
3225 { DominatorTree::Insert, BB, CheckBlock },
3226 { DominatorTree::Insert, CheckBlock, InLoopSucc },
3227 { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
3228 { DominatorTree::Delete, BB, OutOfLoopSucc }
3229 };
3230
3231 DT.applyUpdates(DTUpdates);
3232 if (MSSAU)
3233 MSSAU->applyUpdates(DTUpdates, DT);
3234 L.addBasicBlockToLoop(CheckBlock, LI);
3235
3236#ifndef NDEBUG
3237 DT.verify();
3238 LI.verify(DT);
3239 if (MSSAU && VerifyMemorySSA)
3240 MSSAU->getMemorySSA()->verifyMemorySSA();
3241#endif
3242
3243 // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
3244 // higher because we have just inserted a new block. Need to think how to
3245 // adjust the cost of injected candidates when it was first computed.
3246 LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
3247 << " and considering it for unswitching.");
3248 ++NumInvariantConditionsInjected;
3249 return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
3250 Candidate.Cost);
3251}
3252
3253/// Given chain of loop branch conditions looking like:
3254/// br (Variant < Invariant1)
3255/// br (Variant < Invariant2)
3256/// br (Variant < Invariant3)
3257/// ...
3258/// collect set of invariant conditions on which we want to unswitch, which
3259/// look like:
3260/// Invariant1 <= Invariant2
3261/// Invariant2 <= Invariant3
3262/// ...
3263/// Though they might not immediately exist in the IR, we can still inject them.
3265 SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
3267 const DominatorTree &DT) {
3268
3271 if (Compares.size() < 2)
3272 return false;
3274 for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
3275 Next != Compares.end(); ++Prev, ++Next) {
3276 Value *LHS = Next->Invariant;
3277 Value *RHS = Prev->Invariant;
3278 BasicBlock *InLoopSucc = Prev->InLoopSucc;
3279 InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
3280 NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
3281 std::nullopt, std::move(ToInject));
3282 UnswitchCandidates.push_back(std::move(Candidate));
3283 }
3284 return true;
3285}
3286
3287/// Collect unswitch candidates by invariant conditions that are not immediately
3288/// present in the loop. However, they can be injected into the code if we
3289/// decide it's profitable.
3290/// An example of such conditions is following:
3291///
3292/// for (...) {
3293/// x = load ...
3294/// if (! x <u C1) break;
3295/// if (! x <u C2) break;
3296/// <do something>
3297/// }
3298///
3299/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
3300/// C2" automatically implies "x <u C2", so we can get rid of one of
3301/// loop-variant checks in unswitched loop version.
3304 IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
3305 const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
3306 const MemorySSAUpdater *MSSAU) {
3308 return false;
3309
3310 if (!DT.isReachableFromEntry(L.getHeader()))
3311 return false;
3312 auto *Latch = L.getLoopLatch();
3313 // Need to have a single latch and a preheader.
3314 if (!Latch)
3315 return false;
3316 assert(L.getLoopPreheader() && "Must have a preheader!");
3317
3319 // Traverse the conditions that dominate latch (and therefore dominate each
3320 // other).
3321 for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
3322 DTN = DTN->getIDom()) {
3323 CmpPredicate Pred;
3324 Value *LHS = nullptr, *RHS = nullptr;
3325 BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
3326 auto *BB = DTN->getBlock();
3327 // Ignore inner loops.
3328 if (LI.getLoopFor(BB) != &L)
3329 continue;
3330 auto *Term = BB->getTerminator();
3331 if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
3332 m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
3333 continue;
3334 if (!LHS->getType()->isIntegerTy())
3335 continue;
3336 canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
3337 L);
3338 if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
3339 continue;
3341 continue;
3342 // Strip ZEXT for unsigned predicate.
3343 // TODO: once signed predicates are supported, also strip SEXT.
3344 CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
3345 while (auto *Zext = dyn_cast<ZExtInst>(LHS))
3346 LHS = Zext->getOperand(0);
3347 CandidatesULT[LHS].push_back(Desc);
3348 }
3349
3350 bool Found = false;
3351 for (auto &It : CandidatesULT)
3353 UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
3354 return Found;
3355}
3356
3358 if (!L.isSafeToClone())
3359 return false;
3360 for (auto *BB : L.blocks())
3361 for (auto &I : *BB) {
3362 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3363 return false;
3364 if (auto *CB = dyn_cast<CallBase>(&I)) {
3365 assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3366 if (CB->isConvergent())
3367 return false;
3368 }
3369 }
3370
3371 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3372 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3373 // irreducible control flow into reducible control flow and introduce new
3374 // loops "out of thin air". If we ever discover important use cases for doing
3375 // this, we can add support to loop unswitch, but it is a lot of complexity
3376 // for what seems little or no real world benefit.
3377 LoopBlocksRPO RPOT(&L);
3378 RPOT.perform(&LI);
3380 return false;
3381
3383 L.getUniqueExitBlocks(ExitBlocks);
3384 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3385 // instruction as we don't know how to split those exit blocks.
3386 // FIXME: We should teach SplitBlock to handle this and remove this
3387 // restriction.
3388 for (auto *ExitBB : ExitBlocks) {
3389 auto It = ExitBB->getFirstNonPHIIt();
3391 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3392 "in exit block\n");
3393 return false;
3394 }
3395 }
3396
3397 return true;
3398}
3399
3400static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3401 ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3402 const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3403 const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3404 // Given that unswitching these terminators will require duplicating parts of
3405 // the loop, so we need to be able to model that cost. Compute the ephemeral
3406 // values and set up a data structure to hold per-BB costs. We cache each
3407 // block's cost so that we don't recompute this when considering different
3408 // subsets of the loop for duplication during unswitching.
3410 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3412
3413 // Compute the cost of each block, as well as the total loop cost. Also, bail
3414 // out if we see instructions which are incompatible with loop unswitching
3415 // (convergent, noduplicate, or cross-basic-block tokens).
3416 // FIXME: We might be able to safely handle some of these in non-duplicated
3417 // regions.
3419 L.getHeader()->getParent()->hasMinSize()
3422 InstructionCost LoopCost = 0;
3423 for (auto *BB : L.blocks()) {
3424 InstructionCost Cost = 0;
3425 for (auto &I : *BB) {
3426 if (EphValues.count(&I))
3427 continue;
3428 Cost += TTI.getInstructionCost(&I, CostKind);
3429 }
3430 assert(Cost >= 0 && "Must not have negative costs!");
3431 LoopCost += Cost;
3432 assert(LoopCost >= 0 && "Must not have negative loop costs!");
3433 BBCostMap[BB] = Cost;
3434 }
3435 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3436
3437 // Now we find the best candidate by searching for the one with the following
3438 // properties in order:
3439 //
3440 // 1) An unswitching cost below the threshold
3441 // 2) The smallest number of duplicated unswitch candidates (to avoid
3442 // creating redundant subsequent unswitching)
3443 // 3) The smallest cost after unswitching.
3444 //
3445 // We prioritize reducing fanout of unswitch candidates provided the cost
3446 // remains below the threshold because this has a multiplicative effect.
3447 //
3448 // This requires memoizing each dominator subtree to avoid redundant work.
3449 //
3450 // FIXME: Need to actually do the number of candidates part above.
3452 // Given a terminator which might be unswitched, computes the non-duplicated
3453 // cost for that terminator.
3454 auto ComputeUnswitchedCost = [&](Instruction &TI,
3455 bool FullUnswitch) -> InstructionCost {
3456 // Unswitching selects unswitches the entire loop.
3457 if (isa<SelectInst>(TI))
3458 return LoopCost;
3459
3460 BasicBlock &BB = *TI.getParent();
3462
3463 InstructionCost Cost = 0;
3464 for (BasicBlock *SuccBB : successors(&BB)) {
3465 // Don't count successors more than once.
3466 if (!Visited.insert(SuccBB).second)
3467 continue;
3468
3469 // If this is a partial unswitch candidate, then it must be a conditional
3470 // branch with a condition of either `or`, `and`, their corresponding
3471 // select forms or partially invariant instructions. In that case, one of
3472 // the successors is necessarily duplicated, so don't even try to remove
3473 // its cost.
3474 if (!FullUnswitch) {
3475 auto &BI = cast<BranchInst>(TI);
3476 Value *Cond = skipTrivialSelect(BI.getCondition());
3477 if (match(Cond, m_LogicalAnd())) {
3478 if (SuccBB == BI.getSuccessor(1))
3479 continue;
3480 } else if (match(Cond, m_LogicalOr())) {
3481 if (SuccBB == BI.getSuccessor(0))
3482 continue;
3483 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3484 SuccBB == BI.getSuccessor(0)) ||
3485 (!PartialIVInfo.KnownValue->isOneValue() &&
3486 SuccBB == BI.getSuccessor(1)))
3487 continue;
3488 }
3489
3490 // This successor's domtree will not need to be duplicated after
3491 // unswitching if the edge to the successor dominates it (and thus the
3492 // entire tree). This essentially means there is no other path into this
3493 // subtree and so it will end up live in only one clone of the loop.
3494 if (SuccBB->getUniquePredecessor() ||
3495 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3496 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3497 })) {
3498 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3499 assert(Cost <= LoopCost &&
3500 "Non-duplicated cost should never exceed total loop cost!");
3501 }
3502 }
3503
3504 // Now scale the cost by the number of unique successors minus one. We
3505 // subtract one because there is already at least one copy of the entire
3506 // loop. This is computing the new cost of unswitching a condition.
3507 // Note that guards always have 2 unique successors that are implicit and
3508 // will be materialized if we decide to unswitch it.
3509 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3510 assert(SuccessorsCount > 1 &&
3511 "Cannot unswitch a condition without multiple distinct successors!");
3512 return (LoopCost - Cost) * (SuccessorsCount - 1);
3513 };
3514
3515 std::optional<NonTrivialUnswitchCandidate> Best;
3516 for (auto &Candidate : UnswitchCandidates) {
3517 Instruction &TI = *Candidate.TI;
3518 ArrayRef<Value *> Invariants = Candidate.Invariants;
3520 bool FullUnswitch =
3521 !BI || Candidate.hasPendingInjection() ||
3522 (Invariants.size() == 1 &&
3523 Invariants[0] == skipTrivialSelect(BI->getCondition()));
3524 InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3525 // Calculate cost multiplier which is a tool to limit potentially
3526 // exponential behavior of loop-unswitch.
3528 int CostMultiplier =
3529 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3530 assert(
3531 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3532 "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3533 CandidateCost *= CostMultiplier;
3534 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3535 << " (multiplier: " << CostMultiplier << ")"
3536 << " for unswitch candidate: " << TI << "\n");
3537 } else {
3538 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3539 << " for unswitch candidate: " << TI << "\n");
3540 }
3541
3542 if (!Best || CandidateCost < Best->Cost) {
3543 Best = Candidate;
3544 Best->Cost = CandidateCost;
3545 }
3546 }
3547 assert(Best && "Must be!");
3548 return *Best;
3549}
3550
3551// Insert a freeze on an unswitched branch if all is true:
3552// 1. freeze-loop-unswitch-cond option is true
3553// 2. The branch may not execute in the loop pre-transformation. If a branch may
3554// not execute and could cause UB, it would always cause UB if it is hoisted outside
3555// of the loop. Insert a freeze to prevent this case.
3556// 3. The branch condition may be poison or undef
3558 AssumptionCache &AC) {
3561 return false;
3562
3563 ICFLoopSafetyInfo SafetyInfo;
3564 SafetyInfo.computeLoopSafetyInfo(&L);
3565 if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
3566 return false;
3567
3568 Value *Cond;
3569 if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
3570 Cond = skipTrivialSelect(BI->getCondition());
3571 else
3574 Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
3575}
3576
3580 MemorySSAUpdater *MSSAU,
3581 LPMUpdater &LoopUpdater) {
3582 // Collect all invariant conditions within this loop (as opposed to an inner
3583 // loop which would be handled when visiting that inner loop).
3585 IVConditionInfo PartialIVInfo;
3586 Instruction *PartialIVCondBranch = nullptr;
3587 collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
3588 PartialIVCondBranch, L, LI, AA, MSSAU);
3589 if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
3590 collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
3591 PartialIVCondBranch, L, DT, LI, AA,
3592 MSSAU);
3593 // If we didn't find any candidates, we're done.
3594 if (UnswitchCandidates.empty())
3595 return false;
3596
3597 LLVM_DEBUG(
3598 dbgs() << "Considering " << UnswitchCandidates.size()
3599 << " non-trivial loop invariant conditions for unswitching.\n");
3600
3601 NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3602 UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3603
3604 assert(Best.TI && "Failed to find loop unswitch candidate");
3605 assert(Best.Cost && "Failed to compute cost");
3606
3607 if (*Best.Cost >= UnswitchThreshold) {
3608 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3609 << "\n");
3610 return false;
3611 }
3612
3613 bool InjectedCondition = false;
3614 if (Best.hasPendingInjection()) {
3615 Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3616 InjectedCondition = true;
3617 }
3618 assert(!Best.hasPendingInjection() &&
3619 "All injections should have been done by now!");
3620
3621 if (Best.TI != PartialIVCondBranch)
3622 PartialIVInfo.InstToDuplicate.clear();
3623
3624 bool InsertFreeze;
3625 if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
3626 // If the best candidate is a select, turn it into a branch. Select
3627 // instructions with a poison conditional do not propagate poison, but
3628 // branching on poison causes UB. Insert a freeze on the select
3629 // conditional to prevent UB after turning the select into a branch.
3630 InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
3631 SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
3632 Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
3633 } else {
3634 // If the best candidate is a guard, turn it into a branch.
3635 if (isGuard(Best.TI))
3636 Best.TI =
3637 turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
3638 InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
3639 }
3640
3641 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3642 << ") terminator: " << *Best.TI << "\n");
3643 unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3644 LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3645 InjectedCondition);
3646 return true;
3647}
3648
3649/// Unswitch control flow predicated on loop invariant conditions.
3650///
3651/// This first hoists all branches or switches which are trivial (IE, do not
3652/// require duplicating any part of the loop) out of the loop body. It then
3653/// looks at other loop invariant control flows and tries to unswitch those as
3654/// well by cloning the loop if the result is small enough.
3655///
3656/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3657/// also updated based on the unswitch. The `MSSA` analysis is also updated if
3658/// valid (i.e. its use is enabled).
3659///
3660/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3661/// true, we will attempt to do non-trivial unswitching as well as trivial
3662/// unswitching.
3663///
3664/// The `postUnswitch` function will be run after unswitching is complete
3665/// with information on whether or not the provided loop remains a loop and
3666/// a list of new sibling loops created.
3667///
3668/// If `SE` is non-null, we will update that analysis based on the unswitching
3669/// done.
3670static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3672 TargetTransformInfo &TTI, bool Trivial,
3673 bool NonTrivial, ScalarEvolution *SE,
3674 MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
3675 assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3676 "Loops must be in LCSSA form before unswitching.");
3677
3678 // Must be in loop simplified form: we need a preheader and dedicated exits.
3679 if (!L.isLoopSimplifyForm())
3680 return false;
3681
3682 // Try trivial unswitch first before loop over other basic blocks in the loop.
3683 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3684 // If we unswitched successfully we will want to clean up the loop before
3685 // processing it further so just mark it as unswitched and return.
3686 postUnswitch(L, LoopUpdater, L.getName(),
3687 /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3688 /*InjectedCondition*/ false, {});
3689 return true;
3690 }
3691
3692 const Function *F = L.getHeader()->getParent();
3693
3694 // Check whether we should continue with non-trivial conditions.
3695 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3696 // unswitching for testing and debugging.
3697 // NonTrivial: Parameter that enables non-trivial unswitching for this
3698 // invocation of the transform. But this should be allowed only
3699 // for targets without branch divergence.
3700 //
3701 // FIXME: If divergence analysis becomes available to a loop
3702 // transform, we should allow unswitching for non-trivial uniform
3703 // branches even on targets that have divergence.
3704 // https://bugs.llvm.org/show_bug.cgi?id=48819
3705 bool ContinueWithNonTrivial =
3706 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3707 if (!ContinueWithNonTrivial)
3708 return false;
3709
3710 // Skip non-trivial unswitching for optsize functions.
3711 if (F->hasOptSize())
3712 return false;
3713
3714 // Perform legality checks.
3716 return false;
3717
3718 // For non-trivial unswitching, because it often creates new loops, we rely on
3719 // the pass manager to iterate on the loops rather than trying to immediately
3720 // reach a fixed point. There is no substantial advantage to iterating
3721 // internally, and if any of the new loops are simplified enough to contain
3722 // trivial unswitching we want to prefer those.
3723
3724 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3725 // a partial unswitch when possible below the threshold.
3726 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3727 return true;
3728
3729 // No other opportunities to unswitch.
3730 return false;
3731}
3732
3735 LPMUpdater &U) {
3736 Function &F = *L.getHeader()->getParent();
3737 (void)F;
3738 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3739 << "\n");
3740
3741 std::optional<MemorySSAUpdater> MSSAU;
3742 if (AR.MSSA) {
3743 MSSAU = MemorySSAUpdater(AR.MSSA);
3744 if (VerifyMemorySSA)
3745 AR.MSSA->verifyMemorySSA();
3746 }
3747 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3748 &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
3749 return PreservedAnalyses::all();
3750
3751 if (AR.MSSA && VerifyMemorySSA)
3752 AR.MSSA->verifyMemorySSA();
3753
3754 // Historically this pass has had issues with the dominator tree so verify it
3755 // in asserts builds.
3756 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3757
3758 auto PA = getLoopPassPreservedAnalyses();
3759 if (AR.MSSA)
3760 PA.preserve<MemorySSAAnalysis>();
3761 return PA;
3762}
3763
3765 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3767 OS, MapClassName2PassName);
3768
3769 OS << '<';
3770 OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3771 OS << (Trivial ? "" : "no-") << "trivial";
3772 OS << '>';
3773}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
static Value * getCondition(Instruction *I)
Module.h This file contains the declarations for the Module class.
This defines the Use class.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
This header provides classes for managing per-loop analyses.
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define T
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file implements a set that has insertion order iteration characteristics.
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH)
Rewrite the PHI nodes in an unswitched loop exit basic block.
static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
This routine scans the loop to find a branch or switch which occurs before any side effects occur.
static SmallPtrSet< const BasicBlock *, 16 > recomputeLoopBlockSet(Loop &L, LoopInfo &LI)
Recompute the set of blocks in a loop after unswitching.
static int CalculateUnswitchCostMultiplier(const Instruction &TI, const Loop &L, const LoopInfo &LI, const DominatorTree &DT, ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates)
Cost multiplier is a way to limit potentially exponential behavior of loop-unswitch.
static TinyPtrVector< Value * > collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, const LoopInfo &LI)
Collect all of the loop invariant input values transitively used by the homogeneous instruction graph...
static void deleteDeadClonedBlocks(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, ArrayRef< std::unique_ptr< ValueToValueMapTy > > VMaps, DominatorTree &DT, MemorySSAUpdater *MSSAU)
void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable)
Helper to visit a dominator subtree, invoking a callable on each node.
static BranchInst * turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, AssumptionCache *AC)
Turns a select instruction into implicit control flow branch, making the following replacement:
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI)
void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, bool CurrentLoopValid, bool PartiallyInvariant, bool InjectedCondition, ArrayRef< Loop * > NewLoops)
static bool shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L)
Returns true, if predicate described by ( Pred, LHS, RHS ) succeeding into blocks ( IfTrue,...
static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(ArrayRef< NonTrivialUnswitchCandidate > UnswitchCandidates, const Loop &L, const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo)
static Value * skipTrivialSelect(Value *Cond)
static Loop * getTopMostExitingLoop(const BasicBlock *ExitBB, const LoopInfo &LI)
static bool collectUnswitchCandidatesWithInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
Collect unswitch candidates by invariant conditions that are not immediately present in the loop.
static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, Constant &Replacement)
static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial branch if the condition is loop invariant.
static bool collectUnswitchCandidates(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, const Loop &L, const LoopInfo &LI, AAResults &AA, const MemorySSAUpdater *MSSAU)
static InstructionCost computeDomSubtreeCost(DomTreeNode &N, const SmallDenseMap< BasicBlock *, InstructionCost, 4 > &BBCostMap, SmallDenseMap< DomTreeNode *, InstructionCost, 4 > &DTCostMap)
Recursively compute the cost of a dominator subtree based on the per-block cost map provided.
static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, AssumptionCache &AC)
static void canonicalizeForInvariantConditionInjection(CmpPredicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, BasicBlock *&IfFalse, const Loop &L)
Tries to canonicalize condition described by:
static bool areLoopExitPHIsLoopInvariant(const Loop &L, const BasicBlock &ExitingBB, const BasicBlock &ExitBB)
Check that all the LCSSA PHI nodes in the loop exit block have trivial incoming values along this edg...
static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH, bool FullUnswitch)
Rewrite the PHI nodes in the loop exit basic block and the split off unswitched block.
static bool insertCandidatesWithPendingInjections(SmallVectorImpl< NonTrivialUnswitchCandidate > &UnswitchCandidates, Loop &L, ICmpInst::Predicate Pred, ArrayRef< CompareDesc > Compares, const DominatorTree &DT)
Given chain of loop branch conditions looking like: br (Variant < Invariant1) br (Variant < Invariant...
static NonTrivialUnswitchCandidate injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, MemorySSAUpdater *MSSAU)
Materialize pending invariant condition of the given candidate into IR.
static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, LoopInfo &LI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU)
Unswitch a trivial switch if the condition is loop invariant.
static void unswitchNontrivialInvariants(Loop &L, Instruction &TI, ArrayRef< Value * > Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition)
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef< BasicBlock * > ExitBlocks, LoopInfo &LI, SmallVectorImpl< Loop * > &HoistedLoops, ScalarEvolution *SE)
Rebuild a loop after unswitching removes some subset of blocks and edges.
static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater)
Unswitch control flow predicated on loop invariant conditions.
static void buildPartialInvariantUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > ToDuplicate, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch)
Copy a set of loop invariant values, and conditionally branch on them.
static BasicBlock * buildClonedLoopBlocks(Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, ArrayRef< BasicBlock * > ExitBlocks, BasicBlock *ParentBB, BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, const SmallDenseMap< BasicBlock *, BasicBlock *, 16 > &DominatingSucc, ValueToValueMapTy &VMap, SmallVectorImpl< DominatorTree::UpdateType > &DTUpdates, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Build the cloned blocks for an unswitched copy of the given loop.
static void deleteDeadBlocksFromLoop(Loop &L, SmallVectorImpl< BasicBlock * > &ExitBlocks, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, LPMUpdater &LoopUpdater)
bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc)
Returns true, if metadata on BI allows us to optimize branching into TakenSucc via injection of invar...
static BranchInst * turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU)
Turns a llvm.experimental.guard intrinsic into implicit control flow branch, making the following rep...
static Loop * cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, const ValueToValueMapTy &VMap, LoopInfo &LI)
Recursively clone the specified loop and all of its children.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE)
Hoist the current loop up to the innermost loop containing a remaining exit.
static void buildClonedLoops(Loop &OrigL, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, LoopInfo &LI, SmallVectorImpl< Loop * > &NonChildClonedLoops)
Build the cloned loops of an original loop from unswitching.
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB, ArrayRef< Value * > Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, const BranchInst &ComputeProfFrom)
Copy a set of loop invariant values Invariants and insert them at the end of BB and conditionally bra...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:132
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
iterator begin() const
Definition ArrayRef.h:131
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
size_t size() const
Definition BasicBlock.h:480
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition BasicBlock.h:386
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
static LLVM_ABI bool isStrictPredicate(Predicate predicate)
This is a static version that you can use without an instruction available.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isUnsigned() const
Definition InstrTypes.h:936
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getCompilerGenerated()
Definition DebugLoc.h:163
static DebugLoc getDropped()
Definition DebugLoc.h:164
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator begin()
Definition DenseMap.h:78
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents a freeze function that returns random concrete value if an operand is either a ...
This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to give precise answers on "may...
bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop) const override
Returns true if the instruction in a loop is guaranteed to execute at least once (under the assumptio...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool isRelational() const
Return true if the predicate is relational (not EQ or NE).
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void dropLocation()
Drop the instruction's debug location.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void reserveBlocks(unsigned size)
interface to do reserve() for Blocks
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
iterator end() const
LoopT * AllocateLoop(ArgsTy &&...Args)
LoopT * removeLoop(iterator I)
This removes the specified top-level loop from this loop info object.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
unsigned getLoopDepth(const BlockT *BB) const
Return the loop nesting level of the specified block.
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void destroy(LoopT *L)
Destroy a loop that has been removed from the LoopInfo nest.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
StringRef getName() const
Definition LoopInfo.h:389
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
LLVM_ABI void removeEdge(BasicBlock *From, BasicBlock *To)
Update the MemoryPhi in To following an edge deletion between From and To.
LLVM_ABI void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VM, bool IgnoreIncomingWithNoClones=false)
Update MemorySSA after a loop was cloned, given the blocks in RPO order, the exit blocks and a 1:1 ma...
LLVM_ABI void removeDuplicatePhiEdgesBetween(const BasicBlock *From, const BasicBlock *To)
Update the MemoryPhi in To to have a single incoming edge from From, following a CFG change that repl...
LLVM_ABI void removeBlocks(const SmallSetVector< BasicBlock *, 8 > &DeadBlocks)
Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
LLVM_ABI void moveAllAfterSpliceBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start)
From block was spliced into From and To.
LLVM_ABI MemoryAccess * createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point, bool CreationMustSucceed=true)
Create a MemoryAccess in MemorySSA at a specified point in a block.
LLVM_ABI void applyInsertUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT)
Apply CFG insert updates, analogous with the DT edge updates.
LLVM_ABI void applyUpdates(ArrayRef< CFGUpdate > Updates, DominatorTree &DT, bool UpdateDTFirst=false)
Apply CFG updates, analogous with the DT edge updates.
LLVM_ABI void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
LLVM_ABI void updateExitBlocksForClonedLoop(ArrayRef< BasicBlock * > ExitBlocks, const ValueToValueMapTy &VMap, DominatorTree &DT)
Update phi nodes in exit block successors following cloning.
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
const DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:768
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
The main scalar evolution driver.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI void forgetTopmostLoop(const Loop *L)
LLVM_ABI void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition SetVector.h:105
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI Instruction::InstListType::iterator eraseFromParent()
Delegate the call to the underlying SwitchInst::eraseFromParent() and mark this object to not touch t...
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
unsigned getSuccessorIndex() const
Returns successor index for current case successor.
BasicBlockT * getCaseSuccessor() const
Resolves successor for current case.
ConstantIntT * getCaseValue() const
Resolves case value for current case.
Multiway switch.
BasicBlock * getDefaultDest() const
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
void setDefaultDest(BasicBlock *DefaultCase)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
void push_back(EltTy NewVal)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition ValueMap.h:156
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getDeclarationIfExists(const Module *M, ID id)
Look up the Function declaration of the intrinsic id in the Module M and return it if it exists.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ?
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static cl::opt< int > UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, cl::desc("The cost threshold for unswitching a loop."))
auto successors(const MachineBasicBlock *BB)
static cl::opt< bool > EnableNonTrivialUnswitch("enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " "following the configuration passed into the pass."))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
Op::Description Desc
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:95
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
static cl::opt< bool > EnableUnswitchCostMultiplier("enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, cl::desc("Enable unswitch cost multiplier that prohibits exponential " "explosion in nontrivial unswitch."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
bool isGuard(const User *U)
Returns true iff U has semantics of a guard expressed in a form of call of llvm.experimental....
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static cl::opt< bool > DropNonTrivialImplicitNullChecks("simple-loop-unswitch-drop-non-trivial-implicit-null-checks", cl::init(false), cl::Hidden, cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " "null checks to save time analyzing if we can keep it."))
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition CFG.h:149
static cl::opt< unsigned > InjectInvariantConditionHotnesThreshold("simple-loop-unswitch-inject-invariant-condition-hotness-threshold", cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " "unswitch on them to eliminate branches that are " "not-taken 1/<this option> times or less."), cl::init(16))
static cl::opt< int > UnswitchSiblingsToplevelDiv("unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, cl::desc("Toplevel siblings divisor for cost multiplier."))
detail::zippy< detail::zip_first, T, U, Args... > zip_first(T &&t, U &&u, Args &&...args)
zip iterator that, for the sake of efficiency, assumes the first iteratee to be the shortest.
Definition STLExtras.h:852
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< bool > InjectInvariantConditions("simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, cl::desc("Whether we should inject new invariants and unswitch them to " "eliminate some existing (non-invariant) conditions."), cl::init(true))
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1703
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
static cl::opt< int > UnswitchNumInitialUnscaledCandidates("unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, cl::desc("Number of unswitch candidates that are ignored when calculating " "cost multiplier."))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
static cl::opt< bool > EstimateProfile("simple-loop-unswitch-estimate-profile", cl::Hidden, cl::init(true))
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
static cl::opt< unsigned > MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", cl::desc("Max number of memory uses to explore during " "partial unswitching analysis"), cl::init(100), cl::Hidden)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static cl::opt< bool > FreezeLoopUnswitchCond("freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, cl::desc("If enabled, the freeze instruction will be added to condition " "of loop unswitch to prevent miscompilation."))
LLVM_ABI std::optional< IVConditionInfo > hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold, const MemorySSA &MSSA, AAResults &AA)
Check if the loop header has a conditional branch that is not loop-invariant, because it involves loa...
LLVM_ABI bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put loop into LCSSA form.
Definition LCSSA.cpp:427
static cl::opt< bool > UnswitchGuards("simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates."))
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
static cl::opt< int > UnswitchParentBlocksDiv("unswitch-parent-blocks-div", cl::init(8), cl::Hidden, cl::desc("Outer loop size divisor for cost multiplier."))
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Struct to hold information about a partially invariant condition.
Definition LoopUtils.h:622
SmallVector< Instruction * > InstToDuplicate
Instructions that need to be duplicated and checked for the unswitching condition.
Definition LoopUtils.h:625
Constant * KnownValue
Constant to indicate for which value the condition is invariant.
Definition LoopUtils.h:628
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70