LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205
206} // end namespace llvm
207
208STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
209STATISTIC(NumLinearMaps,
210 "Number of switch instructions turned into linear mapping");
211STATISTIC(NumLookupTables,
212 "Number of switch instructions turned into lookup tables");
214 NumLookupTablesHoles,
215 "Number of switch instructions turned into lookup tables (holes checked)");
216STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
217STATISTIC(NumFoldValueComparisonIntoPredecessors,
218 "Number of value comparisons folded into predecessor basic blocks");
219STATISTIC(NumFoldBranchToCommonDest,
220 "Number of branches folded into predecessor basic block");
222 NumHoistCommonCode,
223 "Number of common instruction 'blocks' hoisted up to the begin block");
224STATISTIC(NumHoistCommonInstrs,
225 "Number of common instructions hoisted up to the begin block");
226STATISTIC(NumSinkCommonCode,
227 "Number of common instruction 'blocks' sunk down to the end block");
228STATISTIC(NumSinkCommonInstrs,
229 "Number of common instructions sunk down to the end block");
230STATISTIC(NumSpeculations, "Number of speculative executed instructions");
231STATISTIC(NumInvokes,
232 "Number of invokes with empty resume blocks simplified into calls");
233STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
234STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
235
236namespace {
237
238// The first field contains the value that the switch produces when a certain
239// case group is selected, and the second field is a vector containing the
240// cases composing the case group.
241using SwitchCaseResultVectorTy =
243
244// The first field contains the phi node that generates a result of the switch
245// and the second field contains the value generated for a certain case in the
246// switch for that PHI.
247using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
248
249/// ValueEqualityComparisonCase - Represents a case of a switch.
250struct ValueEqualityComparisonCase {
252 BasicBlock *Dest;
253
254 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
255 : Value(Value), Dest(Dest) {}
256
257 bool operator<(ValueEqualityComparisonCase RHS) const {
258 // Comparing pointers is ok as we only rely on the order for uniquing.
259 return Value < RHS.Value;
260 }
261
262 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
263};
264
265class SimplifyCFGOpt {
266 const TargetTransformInfo &TTI;
267 DomTreeUpdater *DTU;
268 const DataLayout &DL;
269 ArrayRef<WeakVH> LoopHeaders;
270 const SimplifyCFGOptions &Options;
271 bool Resimplify;
272
273 Value *isValueEqualityComparison(Instruction *TI);
274 BasicBlock *getValueEqualityComparisonCases(
275 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
276 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
277 BasicBlock *Pred,
278 IRBuilder<> &Builder);
279 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
280 Instruction *PTI,
281 IRBuilder<> &Builder);
282 bool foldValueComparisonIntoPredecessors(Instruction *TI,
283 IRBuilder<> &Builder);
284
285 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
286 bool simplifySingleResume(ResumeInst *RI);
287 bool simplifyCommonResume(ResumeInst *RI);
288 bool simplifyCleanupReturn(CleanupReturnInst *RI);
289 bool simplifyUnreachable(UnreachableInst *UI);
290 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
291 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
292 bool simplifyIndirectBr(IndirectBrInst *IBI);
293 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
294 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
295 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
296
297 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
298 IRBuilder<> &Builder);
299 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
300 SelectInst *Select,
301 IRBuilder<> &Builder);
302 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
303 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
304 Instruction *TI, Instruction *I1,
305 SmallVectorImpl<Instruction *> &OtherSuccTIs,
306 ArrayRef<BasicBlock *> UniqueSuccessors);
307 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
308 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
309 BasicBlock *TrueBB, BasicBlock *FalseBB,
310 uint32_t TrueWeight, uint32_t FalseWeight);
311 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
312 const DataLayout &DL);
313 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
314 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
315 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
316 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
317
318public:
319 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
320 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
321 const SimplifyCFGOptions &Opts)
322 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
323 assert((!DTU || !DTU->hasPostDomTree()) &&
324 "SimplifyCFG is not yet capable of maintaining validity of a "
325 "PostDomTree, so don't ask for it.");
326 }
327
328 bool simplifyOnce(BasicBlock *BB);
329 bool run(BasicBlock *BB);
330
331 // Helper to set Resimplify and return change indication.
332 bool requestResimplify() {
333 Resimplify = true;
334 return true;
335 }
336};
337
338// we synthesize a || b as select a, true, b
339// we synthesize a && b as select a, b, false
340// this function determines if SI is playing one of those roles.
341[[maybe_unused]] bool
342isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
343 return ((isa<ConstantInt>(SI->getTrueValue()) &&
344 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
345 (isa<ConstantInt>(SI->getFalseValue()) &&
346 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
347}
348
349} // end anonymous namespace
350
351/// Return true if all the PHI nodes in the basic block \p BB
352/// receive compatible (identical) incoming values when coming from
353/// all of the predecessor blocks that are specified in \p IncomingBlocks.
354///
355/// Note that if the values aren't exactly identical, but \p EquivalenceSet
356/// is provided, and *both* of the values are present in the set,
357/// then they are considered equal.
359 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
360 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
361 assert(IncomingBlocks.size() == 2 &&
362 "Only for a pair of incoming blocks at the time!");
363
364 // FIXME: it is okay if one of the incoming values is an `undef` value,
365 // iff the other incoming value is guaranteed to be a non-poison value.
366 // FIXME: it is okay if one of the incoming values is a `poison` value.
367 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
368 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
369 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
370 if (IV0 == IV1)
371 return true;
372 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
373 EquivalenceSet->contains(IV1))
374 return true;
375 return false;
376 });
377}
378
379/// Return true if it is safe to merge these two
380/// terminator instructions together.
381static bool
383 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
384 if (SI1 == SI2)
385 return false; // Can't merge with self!
386
387 // It is not safe to merge these two switch instructions if they have a common
388 // successor, and if that successor has a PHI node, and if *that* PHI node has
389 // conflicting incoming values from the two switch blocks.
390 BasicBlock *SI1BB = SI1->getParent();
391 BasicBlock *SI2BB = SI2->getParent();
392
394 bool Fail = false;
395 for (BasicBlock *Succ : successors(SI2BB)) {
396 if (!SI1Succs.count(Succ))
397 continue;
398 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
399 continue;
400 Fail = true;
401 if (FailBlocks)
402 FailBlocks->insert(Succ);
403 else
404 break;
405 }
406
407 return !Fail;
408}
409
410/// Update PHI nodes in Succ to indicate that there will now be entries in it
411/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
412/// will be the same as those coming in from ExistPred, an existing predecessor
413/// of Succ.
414static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
415 BasicBlock *ExistPred,
416 MemorySSAUpdater *MSSAU = nullptr) {
417 for (PHINode &PN : Succ->phis())
418 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
419 if (MSSAU)
420 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
421 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
422}
423
424/// Compute an abstract "cost" of speculating the given instruction,
425/// which is assumed to be safe to speculate. TCC_Free means cheap,
426/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
427/// expensive.
429 const TargetTransformInfo &TTI) {
430 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
431}
432
433/// If we have a merge point of an "if condition" as accepted above,
434/// return true if the specified value dominates the block. We don't handle
435/// the true generality of domination here, just a special case which works
436/// well enough for us.
437///
438/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
439/// see if V (which must be an instruction) and its recursive operands
440/// that do not dominate BB have a combined cost lower than Budget and
441/// are non-trapping. If both are true, the instruction is inserted into the
442/// set and true is returned.
443///
444/// The cost for most non-trapping instructions is defined as 1 except for
445/// Select whose cost is 2.
446///
447/// After this function returns, Cost is increased by the cost of
448/// V plus its non-dominating operands. If that cost is greater than
449/// Budget, false is returned and Cost is undefined.
451 Value *V, BasicBlock *BB, Instruction *InsertPt,
452 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
454 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
455 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
456 // so limit the recursion depth.
457 // TODO: While this recursion limit does prevent pathological behavior, it
458 // would be better to track visited instructions to avoid cycles.
460 return false;
461
463 if (!I) {
464 // Non-instructions dominate all instructions and can be executed
465 // unconditionally.
466 return true;
467 }
468 BasicBlock *PBB = I->getParent();
469
470 // We don't want to allow weird loops that might have the "if condition" in
471 // the bottom of this block.
472 if (PBB == BB)
473 return false;
474
475 // If this instruction is defined in a block that contains an unconditional
476 // branch to BB, then it must be in the 'conditional' part of the "if
477 // statement". If not, it definitely dominates the region.
479 if (!BI || BI->getSuccessor() != BB)
480 return true;
481
482 // If we have seen this instruction before, don't count it again.
483 if (AggressiveInsts.count(I))
484 return true;
485
486 // Okay, it looks like the instruction IS in the "condition". Check to
487 // see if it's a cheap instruction to unconditionally compute, and if it
488 // only uses stuff defined outside of the condition. If so, hoist it out.
489 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
490 return false;
491
492 // Overflow arithmetic instruction plus extract value are usually generated
493 // when a division is being replaced. But, in this case, the zero check may
494 // still be kept in the code. In that case it would be worth to hoist these
495 // two instruction out of the basic block. Let's treat this pattern as one
496 // single cheap instruction here!
497 WithOverflowInst *OverflowInst;
498 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
499 ZeroCostInstructions.insert(OverflowInst);
500 Cost += 1;
501 } else if (!ZeroCostInstructions.contains(I))
502 Cost += computeSpeculationCost(I, TTI);
503
504 // Allow exactly one instruction to be speculated regardless of its cost
505 // (as long as it is safe to do so).
506 // This is intended to flatten the CFG even if the instruction is a division
507 // or other expensive operation. The speculation of an expensive instruction
508 // is expected to be undone in CodeGenPrepare if the speculation has not
509 // enabled further IR optimizations.
510 if (Cost > Budget &&
511 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
512 !Cost.isValid()))
513 return false;
514
515 // Okay, we can only really hoist these out if their operands do
516 // not take us over the cost threshold.
517 for (Use &Op : I->operands())
518 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
519 TTI, AC, ZeroCostInstructions, Depth + 1))
520 return false;
521 // Okay, it's safe to do this! Remember this instruction.
522 AggressiveInsts.insert(I);
523 return true;
524}
525
526/// Extract ConstantInt from value, looking through IntToPtr
527/// and PointerNullValue. Return NULL if value is not a constant int.
529 // Normal constant int.
531 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
532 return CI;
533
534 // It is not safe to look through inttoptr or ptrtoint when using unstable
535 // pointer types.
536 if (DL.hasUnstableRepresentation(V->getType()))
537 return nullptr;
538
539 // This is some kind of pointer constant. Turn it into a pointer-sized
540 // ConstantInt if possible.
541 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
542
543 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
545 return ConstantInt::get(IntPtrTy, 0);
546
547 // IntToPtr const int, we can look through this if the semantics of
548 // inttoptr for this address space are a simple (truncating) bitcast.
550 if (CE->getOpcode() == Instruction::IntToPtr)
551 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
552 // The constant is very likely to have the right type already.
553 if (CI->getType() == IntPtrTy)
554 return CI;
555 else
556 return cast<ConstantInt>(
557 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
558 }
559 return nullptr;
560}
561
562namespace {
563
564/// Given a chain of or (||) or and (&&) comparison of a value against a
565/// constant, this will try to recover the information required for a switch
566/// structure.
567/// It will depth-first traverse the chain of comparison, seeking for patterns
568/// like %a == 12 or %a < 4 and combine them to produce a set of integer
569/// representing the different cases for the switch.
570/// Note that if the chain is composed of '||' it will build the set of elements
571/// that matches the comparisons (i.e. any of this value validate the chain)
572/// while for a chain of '&&' it will build the set elements that make the test
573/// fail.
574struct ConstantComparesGatherer {
575 const DataLayout &DL;
576
577 /// Value found for the switch comparison
578 Value *CompValue = nullptr;
579
580 /// Extra clause to be checked before the switch
581 Value *Extra = nullptr;
582
583 /// Set of integers to match in switch
585
586 /// Number of comparisons matched in the and/or chain
587 unsigned UsedICmps = 0;
588
589 /// If the elements in Vals matches the comparisons
590 bool IsEq = false;
591
592 // Used to check if the first matched CompValue shall be the Extra check.
593 bool IgnoreFirstMatch = false;
594 bool MultipleMatches = false;
595
596 /// Construct and compute the result for the comparison instruction Cond
597 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
598 gather(Cond);
599 if (CompValue || !MultipleMatches)
600 return;
601 Extra = nullptr;
602 Vals.clear();
603 UsedICmps = 0;
604 IgnoreFirstMatch = true;
605 gather(Cond);
606 }
607
608 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
609 ConstantComparesGatherer &
610 operator=(const ConstantComparesGatherer &) = delete;
611
612private:
613 /// Try to set the current value used for the comparison, it succeeds only if
614 /// it wasn't set before or if the new value is the same as the old one
615 bool setValueOnce(Value *NewVal) {
616 if (IgnoreFirstMatch) {
617 IgnoreFirstMatch = false;
618 return false;
619 }
620 if (CompValue && CompValue != NewVal) {
621 MultipleMatches = true;
622 return false;
623 }
624 CompValue = NewVal;
625 return true;
626 }
627
628 /// Try to match Instruction "I" as a comparison against a constant and
629 /// populates the array Vals with the set of values that match (or do not
630 /// match depending on isEQ).
631 /// Return false on failure. On success, the Value the comparison matched
632 /// against is placed in CompValue.
633 /// If CompValue is already set, the function is expected to fail if a match
634 /// is found but the value compared to is different.
635 bool matchInstruction(Instruction *I, bool isEQ) {
636 if (match(I, m_Not(m_Instruction(I))))
637 isEQ = !isEQ;
638
639 Value *Val;
640 if (match(I, m_NUWTrunc(m_Value(Val)))) {
641 // If we already have a value for the switch, it has to match!
642 if (!setValueOnce(Val))
643 return false;
644 UsedICmps++;
645 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
646 return true;
647 }
648 // If this is an icmp against a constant, handle this as one of the cases.
649 ICmpInst *ICI;
650 ConstantInt *C;
651 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
652 (C = getConstantInt(I->getOperand(1), DL)))) {
653 return false;
654 }
655
656 Value *RHSVal;
657 const APInt *RHSC;
658
659 // Pattern match a special case
660 // (x & ~2^z) == y --> x == y || x == y|2^z
661 // This undoes a transformation done by instcombine to fuse 2 compares.
662 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
663 // It's a little bit hard to see why the following transformations are
664 // correct. Here is a CVC3 program to verify them for 64-bit values:
665
666 /*
667 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
668 x : BITVECTOR(64);
669 y : BITVECTOR(64);
670 z : BITVECTOR(64);
671 mask : BITVECTOR(64) = BVSHL(ONE, z);
672 QUERY( (y & ~mask = y) =>
673 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
674 );
675 QUERY( (y | mask = y) =>
676 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
677 );
678 */
679
680 // Please note that each pattern must be a dual implication (<--> or
681 // iff). One directional implication can create spurious matches. If the
682 // implication is only one-way, an unsatisfiable condition on the left
683 // side can imply a satisfiable condition on the right side. Dual
684 // implication ensures that satisfiable conditions are transformed to
685 // other satisfiable conditions and unsatisfiable conditions are
686 // transformed to other unsatisfiable conditions.
687
688 // Here is a concrete example of a unsatisfiable condition on the left
689 // implying a satisfiable condition on the right:
690 //
691 // mask = (1 << z)
692 // (x & ~mask) == y --> (x == y || x == (y | mask))
693 //
694 // Substituting y = 3, z = 0 yields:
695 // (x & -2) == 3 --> (x == 3 || x == 2)
696
697 // Pattern match a special case:
698 /*
699 QUERY( (y & ~mask = y) =>
700 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
701 );
702 */
703 if (match(ICI->getOperand(0),
704 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
705 APInt Mask = ~*RHSC;
706 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
707 // If we already have a value for the switch, it has to match!
708 if (!setValueOnce(RHSVal))
709 return false;
710
711 Vals.push_back(C);
712 Vals.push_back(
713 ConstantInt::get(C->getContext(),
714 C->getValue() | Mask));
715 UsedICmps++;
716 return true;
717 }
718 }
719
720 // Pattern match a special case:
721 /*
722 QUERY( (y | mask = y) =>
723 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
724 );
725 */
726 if (match(ICI->getOperand(0),
727 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
728 APInt Mask = *RHSC;
729 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
730 // If we already have a value for the switch, it has to match!
731 if (!setValueOnce(RHSVal))
732 return false;
733
734 Vals.push_back(C);
735 Vals.push_back(ConstantInt::get(C->getContext(),
736 C->getValue() & ~Mask));
737 UsedICmps++;
738 return true;
739 }
740 }
741
742 // If we already have a value for the switch, it has to match!
743 if (!setValueOnce(ICI->getOperand(0)))
744 return false;
745
746 UsedICmps++;
747 Vals.push_back(C);
748 return true;
749 }
750
751 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
752 ConstantRange Span =
754
755 // Shift the range if the compare is fed by an add. This is the range
756 // compare idiom as emitted by instcombine.
757 Value *CandidateVal = I->getOperand(0);
758 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
759 Span = Span.subtract(*RHSC);
760 CandidateVal = RHSVal;
761 }
762
763 // If this is an and/!= check, then we are looking to build the set of
764 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
765 // x != 0 && x != 1.
766 if (!isEQ)
767 Span = Span.inverse();
768
769 // If there are a ton of values, we don't want to make a ginormous switch.
770 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
771 return false;
772 }
773
774 // If we already have a value for the switch, it has to match!
775 if (!setValueOnce(CandidateVal))
776 return false;
777
778 // Add all values from the range to the set
779 APInt Tmp = Span.getLower();
780 do
781 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
782 while (++Tmp != Span.getUpper());
783
784 UsedICmps++;
785 return true;
786 }
787
788 /// Given a potentially 'or'd or 'and'd together collection of icmp
789 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
790 /// the value being compared, and stick the list constants into the Vals
791 /// vector.
792 /// One "Extra" case is allowed to differ from the other.
793 void gather(Value *V) {
794 Value *Op0, *Op1;
795 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
796 IsEq = true;
797 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
798 IsEq = false;
799 else
800 return;
801 // Keep a stack (SmallVector for efficiency) for depth-first traversal
802 SmallVector<Value *, 8> DFT{Op0, Op1};
803 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
804
805 while (!DFT.empty()) {
806 V = DFT.pop_back_val();
807
808 if (Instruction *I = dyn_cast<Instruction>(V)) {
809 // If it is a || (or && depending on isEQ), process the operands.
810 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
811 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
812 if (Visited.insert(Op1).second)
813 DFT.push_back(Op1);
814 if (Visited.insert(Op0).second)
815 DFT.push_back(Op0);
816
817 continue;
818 }
819
820 // Try to match the current instruction
821 if (matchInstruction(I, IsEq))
822 // Match succeed, continue the loop
823 continue;
824 }
825
826 // One element of the sequence of || (or &&) could not be match as a
827 // comparison against the same value as the others.
828 // We allow only one "Extra" case to be checked before the switch
829 if (!Extra) {
830 Extra = V;
831 continue;
832 }
833 // Failed to parse a proper sequence, abort now
834 CompValue = nullptr;
835 break;
836 }
837 }
838};
839
840} // end anonymous namespace
841
843 MemorySSAUpdater *MSSAU = nullptr) {
844 Instruction *Cond = nullptr;
846 Cond = dyn_cast<Instruction>(SI->getCondition());
847 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
848 Cond = dyn_cast<Instruction>(BI->getCondition());
849 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
850 Cond = dyn_cast<Instruction>(IBI->getAddress());
851 }
852
853 TI->eraseFromParent();
854 if (Cond)
856}
857
858/// Return true if the specified terminator checks
859/// to see if a value is equal to constant integer value.
860Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
861 Value *CV = nullptr;
862 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
863 // Do not permit merging of large switch instructions into their
864 // predecessors unless there is only one predecessor.
865 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
866 CV = SI->getCondition();
867 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI))
868 if (BI->getCondition()->hasOneUse()) {
869 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
870 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
871 CV = ICI->getOperand(0);
872 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
873 if (Trunc->hasNoUnsignedWrap())
874 CV = Trunc->getOperand(0);
875 }
876 }
877
878 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
879 if (CV) {
880 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
881 Value *Ptr = PTII->getPointerOperand();
882 if (DL.hasUnstableRepresentation(Ptr->getType()))
883 return CV;
884 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
885 CV = Ptr;
886 }
887 }
888 return CV;
889}
890
891/// Given a value comparison instruction,
892/// decode all of the 'cases' that it represents and return the 'default' block.
893BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
894 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
895 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
896 Cases.reserve(SI->getNumCases());
897 for (auto Case : SI->cases())
898 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
899 Case.getCaseSuccessor()));
900 return SI->getDefaultDest();
901 }
902
903 CondBrInst *BI = cast<CondBrInst>(TI);
904 Value *Cond = BI->getCondition();
905 ICmpInst::Predicate Pred;
906 ConstantInt *C;
907 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
908 Pred = ICI->getPredicate();
909 C = getConstantInt(ICI->getOperand(1), DL);
910 } else {
911 Pred = ICmpInst::ICMP_NE;
912 auto *Trunc = cast<TruncInst>(Cond);
913 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
914 }
915 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
916 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
917 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
918}
919
920/// Given a vector of bb/value pairs, remove any entries
921/// in the list that match the specified block.
922static void
924 std::vector<ValueEqualityComparisonCase> &Cases) {
925 llvm::erase(Cases, BB);
926}
927
928/// Return true if there are any keys in C1 that exist in C2 as well.
929static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
930 std::vector<ValueEqualityComparisonCase> &C2) {
931 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
932
933 // Make V1 be smaller than V2.
934 if (V1->size() > V2->size())
935 std::swap(V1, V2);
936
937 if (V1->empty())
938 return false;
939 if (V1->size() == 1) {
940 // Just scan V2.
941 ConstantInt *TheVal = (*V1)[0].Value;
942 for (const ValueEqualityComparisonCase &VECC : *V2)
943 if (TheVal == VECC.Value)
944 return true;
945 }
946
947 // Otherwise, just sort both lists and compare element by element.
948 array_pod_sort(V1->begin(), V1->end());
949 array_pod_sort(V2->begin(), V2->end());
950 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
951 while (i1 != e1 && i2 != e2) {
952 if ((*V1)[i1].Value == (*V2)[i2].Value)
953 return true;
954 if ((*V1)[i1].Value < (*V2)[i2].Value)
955 ++i1;
956 else
957 ++i2;
958 }
959 return false;
960}
961
962/// If TI is known to be a terminator instruction and its block is known to
963/// only have a single predecessor block, check to see if that predecessor is
964/// also a value comparison with the same value, and if that comparison
965/// determines the outcome of this comparison. If so, simplify TI. This does a
966/// very limited form of jump threading.
967bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
968 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
969 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
970 if (!PredVal)
971 return false; // Not a value comparison in predecessor.
972
973 Value *ThisVal = isValueEqualityComparison(TI);
974 assert(ThisVal && "This isn't a value comparison!!");
975 if (ThisVal != PredVal)
976 return false; // Different predicates.
977
978 // TODO: Preserve branch weight metadata, similarly to how
979 // foldValueComparisonIntoPredecessors preserves it.
980
981 // Find out information about when control will move from Pred to TI's block.
982 std::vector<ValueEqualityComparisonCase> PredCases;
983 BasicBlock *PredDef =
984 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
985 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
986
987 // Find information about how control leaves this block.
988 std::vector<ValueEqualityComparisonCase> ThisCases;
989 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
990 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
991
992 // If TI's block is the default block from Pred's comparison, potentially
993 // simplify TI based on this knowledge.
994 if (PredDef == TI->getParent()) {
995 // If we are here, we know that the value is none of those cases listed in
996 // PredCases. If there are any cases in ThisCases that are in PredCases, we
997 // can simplify TI.
998 if (!valuesOverlap(PredCases, ThisCases))
999 return false;
1000
1001 if (isa<CondBrInst>(TI)) {
1002 // Okay, one of the successors of this condbr is dead. Convert it to a
1003 // uncond br.
1004 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1005 // Insert the new branch.
1006 Instruction *NI = Builder.CreateBr(ThisDef);
1007 (void)NI;
1008
1009 // Remove PHI node entries for the dead edge.
1010 ThisCases[0].Dest->removePredecessor(PredDef);
1011
1012 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1013 << "Through successor TI: " << *TI << "Leaving: " << *NI
1014 << "\n");
1015
1017
1018 if (DTU)
1019 DTU->applyUpdates(
1020 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1021
1022 return true;
1023 }
1024
1025 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1026 // Okay, TI has cases that are statically dead, prune them away.
1027 SmallPtrSet<Constant *, 16> DeadCases;
1028 for (const ValueEqualityComparisonCase &Case : PredCases)
1029 DeadCases.insert(Case.Value);
1030
1031 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1032 << "Through successor TI: " << *TI);
1033
1034 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1035 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1036 --i;
1037 auto *Successor = i->getCaseSuccessor();
1038 if (DTU)
1039 ++NumPerSuccessorCases[Successor];
1040 if (DeadCases.count(i->getCaseValue())) {
1041 Successor->removePredecessor(PredDef);
1042 SI.removeCase(i);
1043 if (DTU)
1044 --NumPerSuccessorCases[Successor];
1045 }
1046 }
1047
1048 if (DTU) {
1049 std::vector<DominatorTree::UpdateType> Updates;
1050 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1051 if (I.second == 0)
1052 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1053 DTU->applyUpdates(Updates);
1054 }
1055
1056 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1057 return true;
1058 }
1059
1060 // Otherwise, TI's block must correspond to some matched value. Find out
1061 // which value (or set of values) this is.
1062 ConstantInt *TIV = nullptr;
1063 BasicBlock *TIBB = TI->getParent();
1064 for (const auto &[Value, Dest] : PredCases)
1065 if (Dest == TIBB) {
1066 if (TIV)
1067 return false; // Cannot handle multiple values coming to this block.
1068 TIV = Value;
1069 }
1070 assert(TIV && "No edge from pred to succ?");
1071
1072 // Okay, we found the one constant that our value can be if we get into TI's
1073 // BB. Find out which successor will unconditionally be branched to.
1074 BasicBlock *TheRealDest = nullptr;
1075 for (const auto &[Value, Dest] : ThisCases)
1076 if (Value == TIV) {
1077 TheRealDest = Dest;
1078 break;
1079 }
1080
1081 // If not handled by any explicit cases, it is handled by the default case.
1082 if (!TheRealDest)
1083 TheRealDest = ThisDef;
1084
1085 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1086
1087 // Remove PHI node entries for dead edges.
1088 BasicBlock *CheckEdge = TheRealDest;
1089 for (BasicBlock *Succ : successors(TIBB))
1090 if (Succ != CheckEdge) {
1091 if (Succ != TheRealDest)
1092 RemovedSuccs.insert(Succ);
1093 Succ->removePredecessor(TIBB);
1094 } else
1095 CheckEdge = nullptr;
1096
1097 // Insert the new branch.
1098 Instruction *NI = Builder.CreateBr(TheRealDest);
1099 (void)NI;
1100
1101 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1102 << "Through successor TI: " << *TI << "Leaving: " << *NI
1103 << "\n");
1104
1106 if (DTU) {
1107 SmallVector<DominatorTree::UpdateType, 2> Updates;
1108 Updates.reserve(RemovedSuccs.size());
1109 for (auto *RemovedSucc : RemovedSuccs)
1110 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1111 DTU->applyUpdates(Updates);
1112 }
1113 return true;
1114}
1115
1116namespace {
1117
1118/// This class implements a stable ordering of constant
1119/// integers that does not depend on their address. This is important for
1120/// applications that sort ConstantInt's to ensure uniqueness.
1121struct ConstantIntOrdering {
1122 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1123 return LHS->getValue().ult(RHS->getValue());
1124 }
1125};
1126
1127} // end anonymous namespace
1128
1130 ConstantInt *const *P2) {
1131 const ConstantInt *LHS = *P1;
1132 const ConstantInt *RHS = *P2;
1133 if (LHS == RHS)
1134 return 0;
1135 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1136}
1137
1138/// Get Weights of a given terminator, the default weight is at the front
1139/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1140/// metadata.
1142 SmallVectorImpl<uint64_t> &Weights) {
1143 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1144 assert(MD && "Invalid branch-weight metadata");
1145 extractFromBranchWeightMD64(MD, Weights);
1146
1147 // If TI is a conditional eq, the default case is the false case,
1148 // and the corresponding branch-weight data is at index 2. We swap the
1149 // default weight to be the first entry.
1150 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
1151 assert(Weights.size() == 2);
1152 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1153 if (!ICI)
1154 return;
1155
1156 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1157 std::swap(Weights.front(), Weights.back());
1158 }
1159}
1160
1162 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1163 Instruction *PTI = PredBlock->getTerminator();
1164
1165 // If we have bonus instructions, clone them into the predecessor block.
1166 // Note that there may be multiple predecessor blocks, so we cannot move
1167 // bonus instructions to a predecessor block.
1168 for (Instruction &BonusInst : *BB) {
1169 if (BonusInst.isTerminator())
1170 continue;
1171
1172 Instruction *NewBonusInst = BonusInst.clone();
1173
1174 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1175 // Unless the instruction has the same !dbg location as the original
1176 // branch, drop it. When we fold the bonus instructions we want to make
1177 // sure we reset their debug locations in order to avoid stepping on
1178 // dead code caused by folding dead branches.
1179 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1180 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1181 mapAtomInstance(DL, VMap);
1182 }
1183
1184 RemapInstruction(NewBonusInst, VMap,
1186
1187 // If we speculated an instruction, we need to drop any metadata that may
1188 // result in undefined behavior, as the metadata might have been valid
1189 // only given the branch precondition.
1190 // Similarly strip attributes on call parameters that may cause UB in
1191 // location the call is moved to.
1192 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1193
1194 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1195 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1196 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1198
1199 NewBonusInst->takeName(&BonusInst);
1200 BonusInst.setName(NewBonusInst->getName() + ".old");
1201 VMap[&BonusInst] = NewBonusInst;
1202
1203 // Update (liveout) uses of bonus instructions,
1204 // now that the bonus instruction has been cloned into predecessor.
1205 // Note that we expect to be in a block-closed SSA form for this to work!
1206 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1207 auto *UI = cast<Instruction>(U.getUser());
1208 auto *PN = dyn_cast<PHINode>(UI);
1209 if (!PN) {
1210 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1211 "If the user is not a PHI node, then it should be in the same "
1212 "block as, and come after, the original bonus instruction.");
1213 continue; // Keep using the original bonus instruction.
1214 }
1215 // Is this the block-closed SSA form PHI node?
1216 if (PN->getIncomingBlock(U) == BB)
1217 continue; // Great, keep using the original bonus instruction.
1218 // The only other alternative is an "use" when coming from
1219 // the predecessor block - here we should refer to the cloned bonus instr.
1220 assert(PN->getIncomingBlock(U) == PredBlock &&
1221 "Not in block-closed SSA form?");
1222 U.set(NewBonusInst);
1223 }
1224 }
1225
1226 // Key Instructions: We may have propagated atom info into the pred. If the
1227 // pred's terminator already has atom info do nothing as merging would drop
1228 // one atom group anyway. If it doesn't, propagte the remapped atom group
1229 // from BB's terminator.
1230 if (auto &PredDL = PTI->getDebugLoc()) {
1231 auto &DL = BB->getTerminator()->getDebugLoc();
1232 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1233 PredDL.isSameSourceLocation(DL)) {
1234 PTI->setDebugLoc(DL);
1235 RemapSourceAtom(PTI, VMap);
1236 }
1237 }
1238}
1239
1240bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1241 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1242 BasicBlock *BB = TI->getParent();
1243 BasicBlock *Pred = PTI->getParent();
1244
1246
1247 // Figure out which 'cases' to copy from SI to PSI.
1248 std::vector<ValueEqualityComparisonCase> BBCases;
1249 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1250
1251 std::vector<ValueEqualityComparisonCase> PredCases;
1252 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1253
1254 // Based on whether the default edge from PTI goes to BB or not, fill in
1255 // PredCases and PredDefault with the new switch cases we would like to
1256 // build.
1257 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1258
1259 // Update the branch weight metadata along the way
1260 SmallVector<uint64_t, 8> Weights;
1261 bool PredHasWeights = hasBranchWeightMD(*PTI);
1262 bool SuccHasWeights = hasBranchWeightMD(*TI);
1263
1264 if (PredHasWeights) {
1265 getBranchWeights(PTI, Weights);
1266 // branch-weight metadata is inconsistent here.
1267 if (Weights.size() != 1 + PredCases.size())
1268 PredHasWeights = SuccHasWeights = false;
1269 } else if (SuccHasWeights)
1270 // If there are no predecessor weights but there are successor weights,
1271 // populate Weights with 1, which will later be scaled to the sum of
1272 // successor's weights
1273 Weights.assign(1 + PredCases.size(), 1);
1274
1275 SmallVector<uint64_t, 8> SuccWeights;
1276 if (SuccHasWeights) {
1277 getBranchWeights(TI, SuccWeights);
1278 // branch-weight metadata is inconsistent here.
1279 if (SuccWeights.size() != 1 + BBCases.size())
1280 PredHasWeights = SuccHasWeights = false;
1281 } else if (PredHasWeights)
1282 SuccWeights.assign(1 + BBCases.size(), 1);
1283
1284 if (PredDefault == BB) {
1285 // If this is the default destination from PTI, only the edges in TI
1286 // that don't occur in PTI, or that branch to BB will be activated.
1287 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1288 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1289 if (PredCases[i].Dest != BB)
1290 PTIHandled.insert(PredCases[i].Value);
1291 else {
1292 // The default destination is BB, we don't need explicit targets.
1293 std::swap(PredCases[i], PredCases.back());
1294
1295 if (PredHasWeights || SuccHasWeights) {
1296 // Increase weight for the default case.
1297 Weights[0] += Weights[i + 1];
1298 std::swap(Weights[i + 1], Weights.back());
1299 Weights.pop_back();
1300 }
1301
1302 PredCases.pop_back();
1303 --i;
1304 --e;
1305 }
1306
1307 // Reconstruct the new switch statement we will be building.
1308 if (PredDefault != BBDefault) {
1309 PredDefault->removePredecessor(Pred);
1310 if (DTU && PredDefault != BB)
1311 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1312 PredDefault = BBDefault;
1313 ++NewSuccessors[BBDefault];
1314 }
1315
1316 unsigned CasesFromPred = Weights.size();
1317 uint64_t ValidTotalSuccWeight = 0;
1318 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1319 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1320 PredCases.push_back(BBCases[i]);
1321 ++NewSuccessors[BBCases[i].Dest];
1322 if (SuccHasWeights || PredHasWeights) {
1323 // The default weight is at index 0, so weight for the ith case
1324 // should be at index i+1. Scale the cases from successor by
1325 // PredDefaultWeight (Weights[0]).
1326 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1327 ValidTotalSuccWeight += SuccWeights[i + 1];
1328 }
1329 }
1330
1331 if (SuccHasWeights || PredHasWeights) {
1332 ValidTotalSuccWeight += SuccWeights[0];
1333 // Scale the cases from predecessor by ValidTotalSuccWeight.
1334 for (unsigned i = 1; i < CasesFromPred; ++i)
1335 Weights[i] *= ValidTotalSuccWeight;
1336 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1337 Weights[0] *= SuccWeights[0];
1338 }
1339 } else {
1340 // If this is not the default destination from PSI, only the edges
1341 // in SI that occur in PSI with a destination of BB will be
1342 // activated.
1343 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1344 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1345 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1346 if (PredCases[i].Dest == BB) {
1347 PTIHandled.insert(PredCases[i].Value);
1348
1349 if (PredHasWeights || SuccHasWeights) {
1350 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1351 std::swap(Weights[i + 1], Weights.back());
1352 Weights.pop_back();
1353 }
1354
1355 std::swap(PredCases[i], PredCases.back());
1356 PredCases.pop_back();
1357 --i;
1358 --e;
1359 }
1360
1361 // Okay, now we know which constants were sent to BB from the
1362 // predecessor. Figure out where they will all go now.
1363 for (const ValueEqualityComparisonCase &Case : BBCases)
1364 if (PTIHandled.count(Case.Value)) {
1365 // If this is one we are capable of getting...
1366 if (PredHasWeights || SuccHasWeights)
1367 Weights.push_back(WeightsForHandled[Case.Value]);
1368 PredCases.push_back(Case);
1369 ++NewSuccessors[Case.Dest];
1370 PTIHandled.erase(Case.Value); // This constant is taken care of
1371 }
1372
1373 // If there are any constants vectored to BB that TI doesn't handle,
1374 // they must go to the default destination of TI.
1375 for (ConstantInt *I : PTIHandled) {
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[I]);
1378 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1379 ++NewSuccessors[BBDefault];
1380 }
1381 }
1382
1383 // Okay, at this point, we know which new successor Pred will get. Make
1384 // sure we update the number of entries in the PHI nodes for these
1385 // successors.
1386 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1387 if (DTU) {
1388 SuccsOfPred = {llvm::from_range, successors(Pred)};
1389 Updates.reserve(Updates.size() + NewSuccessors.size());
1390 }
1391 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1392 NewSuccessors) {
1393 for (auto I : seq(NewSuccessor.second)) {
1394 (void)I;
1395 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1396 }
1397 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1398 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1399 }
1400
1401 Builder.SetInsertPoint(PTI);
1402 // Convert pointer to int before we switch.
1403 if (CV->getType()->isPointerTy()) {
1404 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1405 "Should not end up here with unstable pointers");
1406 CV =
1407 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1408 }
1409
1410 // Now that the successors are updated, create the new Switch instruction.
1411 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1412 NewSI->setDebugLoc(PTI->getDebugLoc());
1413 for (ValueEqualityComparisonCase &V : PredCases)
1414 NewSI->addCase(V.Value, V.Dest);
1415
1416 if (PredHasWeights || SuccHasWeights)
1417 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1418 /*ElideAllZero=*/true);
1419
1421
1422 // Okay, last check. If BB is still a successor of PSI, then we must
1423 // have an infinite loop case. If so, add an infinitely looping block
1424 // to handle the case to preserve the behavior of the code.
1425 BasicBlock *InfLoopBlock = nullptr;
1426 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1427 if (NewSI->getSuccessor(i) == BB) {
1428 if (!InfLoopBlock) {
1429 // Insert it at the end of the function, because it's either code,
1430 // or it won't matter if it's hot. :)
1431 InfLoopBlock =
1432 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1433 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
1434 if (DTU)
1435 Updates.push_back(
1436 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1437 }
1438 NewSI->setSuccessor(i, InfLoopBlock);
1439 }
1440
1441 if (DTU) {
1442 if (InfLoopBlock)
1443 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1444
1445 Updates.push_back({DominatorTree::Delete, Pred, BB});
1446
1447 DTU->applyUpdates(Updates);
1448 }
1449
1450 ++NumFoldValueComparisonIntoPredecessors;
1451 return true;
1452}
1453
1454/// The specified terminator is a value equality comparison instruction
1455/// (either a switch or a branch on "X == c").
1456/// See if any of the predecessors of the terminator block are value comparisons
1457/// on the same value. If so, and if safe to do so, fold them together.
1458bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1459 IRBuilder<> &Builder) {
1460 BasicBlock *BB = TI->getParent();
1461 Value *CV = isValueEqualityComparison(TI); // CondVal
1462 assert(CV && "Not a comparison?");
1463
1464 bool Changed = false;
1465
1466 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1467 while (!Preds.empty()) {
1468 BasicBlock *Pred = Preds.pop_back_val();
1469 Instruction *PTI = Pred->getTerminator();
1470
1471 // Don't try to fold into itself.
1472 if (Pred == BB)
1473 continue;
1474
1475 // See if the predecessor is a comparison with the same value.
1476 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1477 if (PCV != CV)
1478 continue;
1479
1480 SmallSetVector<BasicBlock *, 4> FailBlocks;
1481 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1482 for (auto *Succ : FailBlocks) {
1483 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1484 return false;
1485 }
1486 }
1487
1488 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1489 Changed = true;
1490 }
1491 return Changed;
1492}
1493
1494// If we would need to insert a select that uses the value of this invoke
1495// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1496// need to do this), we can't hoist the invoke, as there is nowhere to put the
1497// select in this case.
1499 Instruction *I1, Instruction *I2) {
1500 for (BasicBlock *Succ : successors(BB1)) {
1501 for (const PHINode &PN : Succ->phis()) {
1502 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1503 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1504 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1505 return false;
1506 }
1507 }
1508 }
1509 return true;
1510}
1511
1512// Get interesting characteristics of instructions that
1513// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1514// instructions can be reordered across.
1520
1522 unsigned Flags = 0;
1523 if (I->mayReadFromMemory())
1524 Flags |= SkipReadMem;
1525 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1526 // inalloca) across stacksave/stackrestore boundaries.
1527 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1528 Flags |= SkipSideEffect;
1530 Flags |= SkipImplicitControlFlow;
1531 return Flags;
1532}
1533
1534// Returns true if it is safe to reorder an instruction across preceding
1535// instructions in a basic block.
1536static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1537 // Don't reorder a store over a load.
1538 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1539 return false;
1540
1541 // If we have seen an instruction with side effects, it's unsafe to reorder an
1542 // instruction which reads memory or itself has side effects.
1543 if ((Flags & SkipSideEffect) &&
1544 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1545 return false;
1546
1547 // Reordering across an instruction which does not necessarily transfer
1548 // control to the next instruction is speculation.
1550 return false;
1551
1552 // Hoisting of llvm.deoptimize is only legal together with the next return
1553 // instruction, which this pass is not always able to do.
1554 if (auto *CB = dyn_cast<CallBase>(I))
1555 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1556 return false;
1557
1558 // It's also unsafe/illegal to hoist an instruction above its instruction
1559 // operands
1560 BasicBlock *BB = I->getParent();
1561 for (Value *Op : I->operands()) {
1562 if (auto *J = dyn_cast<Instruction>(Op))
1563 if (J->getParent() == BB)
1564 return false;
1565 }
1566
1567 return true;
1568}
1569
1570static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1571
1572/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1573/// instructions \p I1 and \p I2 can and should be hoisted.
1575 const TargetTransformInfo &TTI) {
1576 // If we're going to hoist a call, make sure that the two instructions
1577 // we're commoning/hoisting are both marked with musttail, or neither of
1578 // them is marked as such. Otherwise, we might end up in a situation where
1579 // we hoist from a block where the terminator is a `ret` to a block where
1580 // the terminator is a `br`, and `musttail` calls expect to be followed by
1581 // a return.
1582 auto *C1 = dyn_cast<CallInst>(I1);
1583 auto *C2 = dyn_cast<CallInst>(I2);
1584 if (C1 && C2)
1585 if (C1->isMustTailCall() != C2->isMustTailCall())
1586 return false;
1587
1588 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1589 return false;
1590
1591 // If any of the two call sites has nomerge or convergent attribute, stop
1592 // hoisting.
1593 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1594 if (CB1->cannotMerge() || CB1->isConvergent())
1595 return false;
1596 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1597 if (CB2->cannotMerge() || CB2->isConvergent())
1598 return false;
1599
1600 return true;
1601}
1602
1603/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1604/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1605/// hoistCommonCodeFromSuccessors. e.g. The input:
1606/// I1 DVRs: { x, z },
1607/// OtherInsts: { I2 DVRs: { x, y, z } }
1608/// would result in hoisting only DbgVariableRecord x.
1610 Instruction *TI, Instruction *I1,
1611 SmallVectorImpl<Instruction *> &OtherInsts) {
1612 if (!I1->hasDbgRecords())
1613 return;
1614 using CurrentAndEndIt =
1615 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1616 // Vector of {Current, End} iterators.
1618 Itrs.reserve(OtherInsts.size() + 1);
1619 // Helper lambdas for lock-step checks:
1620 // Return true if this Current == End.
1621 auto atEnd = [](const CurrentAndEndIt &Pair) {
1622 return Pair.first == Pair.second;
1623 };
1624 // Return true if all Current are identical.
1625 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1626 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1628 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1629 });
1630 };
1631
1632 // Collect the iterators.
1633 Itrs.push_back(
1634 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1635 for (Instruction *Other : OtherInsts) {
1636 if (!Other->hasDbgRecords())
1637 return;
1638 Itrs.push_back(
1639 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1640 }
1641
1642 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1643 // the lock-step DbgRecord are identical, hoist all of them to TI.
1644 // This replicates the dbg.* intrinsic behaviour in
1645 // hoistCommonCodeFromSuccessors.
1646 while (none_of(Itrs, atEnd)) {
1647 bool HoistDVRs = allIdentical(Itrs);
1648 for (CurrentAndEndIt &Pair : Itrs) {
1649 // Increment Current iterator now as we may be about to move the
1650 // DbgRecord.
1651 DbgRecord &DR = *Pair.first++;
1652 if (HoistDVRs) {
1653 DR.removeFromParent();
1654 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1655 }
1656 }
1657 }
1658}
1659
1661 const Instruction *I2) {
1662 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1663 return true;
1664
1665 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1666 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1667 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1668 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1669 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1670
1671 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1672 return I1->getOperand(0) == I2->getOperand(1) &&
1673 I1->getOperand(1) == I2->getOperand(0) &&
1674 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1675 }
1676
1677 return false;
1678}
1679
1680/// If the target supports conditional faulting,
1681/// we look for the following pattern:
1682/// \code
1683/// BB:
1684/// ...
1685/// %cond = icmp ult %x, %y
1686/// br i1 %cond, label %TrueBB, label %FalseBB
1687/// FalseBB:
1688/// store i32 1, ptr %q, align 4
1689/// ...
1690/// TrueBB:
1691/// %maskedloadstore = load i32, ptr %b, align 4
1692/// store i32 %maskedloadstore, ptr %p, align 4
1693/// ...
1694/// \endcode
1695///
1696/// and transform it into:
1697///
1698/// \code
1699/// BB:
1700/// ...
1701/// %cond = icmp ult %x, %y
1702/// %maskedloadstore = cload i32, ptr %b, %cond
1703/// cstore i32 %maskedloadstore, ptr %p, %cond
1704/// cstore i32 1, ptr %q, ~%cond
1705/// br i1 %cond, label %TrueBB, label %FalseBB
1706/// FalseBB:
1707/// ...
1708/// TrueBB:
1709/// ...
1710/// \endcode
1711///
1712/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1713/// e.g.
1714///
1715/// \code
1716/// %vcond = bitcast i1 %cond to <1 x i1>
1717/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1718/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1719/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1720/// call void @llvm.masked.store.v1i32.p0
1721/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1722/// %cond.not = xor i1 %cond, true
1723/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1724/// call void @llvm.masked.store.v1i32.p0
1725/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1726/// \endcode
1727///
1728/// So we need to turn hoisted load/store into cload/cstore.
1729///
1730/// \param BI The branch instruction.
1731/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1732/// will be speculated.
1733/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1735 CondBrInst *BI,
1736 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1737 std::optional<bool> Invert, Instruction *Sel) {
1738 auto &Context = BI->getParent()->getContext();
1739 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1740 auto *Cond = BI->getCondition();
1741 // Construct the condition if needed.
1742 BasicBlock *BB = BI->getParent();
1743 Value *Mask = nullptr;
1744 Value *MaskFalse = nullptr;
1745 Value *MaskTrue = nullptr;
1746 if (Invert.has_value()) {
1747 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1748 Mask = Builder.CreateBitCast(
1749 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1750 VCondTy);
1751 } else {
1752 IRBuilder<> Builder(BI);
1753 MaskFalse = Builder.CreateBitCast(
1754 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1755 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1756 }
1757 auto PeekThroughBitcasts = [](Value *V) {
1758 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1759 V = BitCast->getOperand(0);
1760 return V;
1761 };
1762 for (auto *I : SpeculatedConditionalLoadsStores) {
1763 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1764 if (!Invert.has_value())
1765 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1766 // We currently assume conditional faulting load/store is supported for
1767 // scalar types only when creating new instructions. This can be easily
1768 // extended for vector types in the future.
1769 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1770 auto *Op0 = I->getOperand(0);
1771 CallInst *MaskedLoadStore = nullptr;
1772 if (auto *LI = dyn_cast<LoadInst>(I)) {
1773 // Handle Load.
1774 auto *Ty = I->getType();
1775 PHINode *PN = nullptr;
1776 Value *PassThru = nullptr;
1777 if (Invert.has_value())
1778 for (User *U : I->users()) {
1779 if ((PN = dyn_cast<PHINode>(U))) {
1780 PassThru = Builder.CreateBitCast(
1781 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1782 FixedVectorType::get(Ty, 1));
1783 } else if (auto *Ins = cast<Instruction>(U);
1784 Sel && Ins->getParent() == BB) {
1785 // This happens when store or/and a speculative instruction between
1786 // load and store were hoisted to the BB. Make sure the masked load
1787 // inserted before its use.
1788 // We assume there's one of such use.
1789 Builder.SetInsertPoint(Ins);
1790 }
1791 }
1792 MaskedLoadStore = Builder.CreateMaskedLoad(
1793 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1794 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1795 if (PN)
1796 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1797 I->replaceAllUsesWith(NewLoadStore);
1798 } else {
1799 // Handle Store.
1800 auto *StoredVal = Builder.CreateBitCast(
1801 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1802 MaskedLoadStore = Builder.CreateMaskedStore(
1803 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1804 }
1805 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1806 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1807 //
1808 // !nonnull, !align : Not support pointer type, no need to keep.
1809 // !range: Load type is changed from scalar to vector, but the metadata on
1810 // vector specifies a per-element range, so the semantics stay the
1811 // same. Keep it.
1812 // !annotation: Not impact semantics. Keep it.
1813 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1814 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1815 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1816 // FIXME: DIAssignID is not supported for masked store yet.
1817 // (Verifier::visitDIAssignIDMetadata)
1819 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1820 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1821 });
1822 MaskedLoadStore->copyMetadata(*I);
1823 I->eraseFromParent();
1824 }
1825}
1826
1828 const TargetTransformInfo &TTI) {
1829 // Not handle volatile or atomic.
1830 bool IsStore = false;
1831 if (auto *L = dyn_cast<LoadInst>(I)) {
1832 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1833 return false;
1834 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1835 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1836 return false;
1837 IsStore = true;
1838 } else
1839 return false;
1840
1841 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1842 // That's why we have the alignment limitation.
1843 // FIXME: Update the prototype of the intrinsics?
1844 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1846}
1847
1848/// Hoist any common code in the successor blocks up into the block. This
1849/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1850/// given, only perform hoisting in case all successors blocks contain matching
1851/// instructions only. In that case, all instructions can be hoisted and the
1852/// original branch will be replaced and selects for PHIs are added.
1853bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1854 bool AllInstsEqOnly) {
1855 // This does very trivial matching, with limited scanning, to find identical
1856 // instructions in the two blocks. In particular, we don't want to get into
1857 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1858 // such, we currently just scan for obviously identical instructions in an
1859 // identical order, possibly separated by the same number of non-identical
1860 // instructions.
1861 BasicBlock *BB = TI->getParent();
1862 unsigned int SuccSize = succ_size(BB);
1863 if (SuccSize < 2)
1864 return false;
1865
1866 // If either of the blocks has it's address taken, then we can't do this fold,
1867 // because the code we'd hoist would no longer run when we jump into the block
1868 // by it's address.
1869 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1870 for (auto *Succ : UniqueSuccessors) {
1871 if (Succ->hasAddressTaken())
1872 return false;
1873 // Use getUniquePredecessor instead of getSinglePredecessor to support
1874 // multi-cases successors in switch.
1875 if (Succ->getUniquePredecessor())
1876 continue;
1877 // If Succ has >1 predecessors, continue to check if the Succ contains only
1878 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1879 // can relax the condition based on the assumptiom that the program would
1880 // never enter Succ and trigger such an UB.
1881 if (isa<UnreachableInst>(*Succ->begin()))
1882 continue;
1883 return false;
1884 }
1885 // The second of pair is a SkipFlags bitmask.
1886 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1887 SmallVector<SuccIterPair, 8> SuccIterPairs;
1888 for (auto *Succ : UniqueSuccessors) {
1889 BasicBlock::iterator SuccItr = Succ->begin();
1890 if (isa<PHINode>(*SuccItr))
1891 return false;
1892 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1893 }
1894
1895 if (AllInstsEqOnly) {
1896 // Check if all instructions in the successor blocks match. This allows
1897 // hoisting all instructions and removing the blocks we are hoisting from,
1898 // so does not add any new instructions.
1899
1900 // Check if sizes and terminators of all successors match.
1901 unsigned Size0 = UniqueSuccessors[0]->size();
1902 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1903 bool AllSame =
1904 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1905 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1906 Succ->size() == Size0;
1907 });
1908 if (!AllSame)
1909 return false;
1910 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1911 while (LRI.isValid()) {
1912 Instruction *I0 = (*LRI)[0];
1913 if (any_of(*LRI, [I0](Instruction *I) {
1914 return !areIdenticalUpToCommutativity(I0, I);
1915 })) {
1916 return false;
1917 }
1918 --LRI;
1919 }
1920 // Now we know that all instructions in all successors can be hoisted. Let
1921 // the loop below handle the hoisting.
1922 }
1923
1924 // Count how many instructions were not hoisted so far. There's a limit on how
1925 // many instructions we skip, serving as a compilation time control as well as
1926 // preventing excessive increase of life ranges.
1927 unsigned NumSkipped = 0;
1928 // If we find an unreachable instruction at the beginning of a basic block, we
1929 // can still hoist instructions from the rest of the basic blocks.
1930 if (SuccIterPairs.size() > 2) {
1931 erase_if(SuccIterPairs,
1932 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1933 if (SuccIterPairs.size() < 2)
1934 return false;
1935 }
1936
1937 bool Changed = false;
1938
1939 for (;;) {
1940 auto *SuccIterPairBegin = SuccIterPairs.begin();
1941 auto &BB1ItrPair = *SuccIterPairBegin++;
1942 auto OtherSuccIterPairRange =
1943 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1944 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1945
1946 Instruction *I1 = &*BB1ItrPair.first;
1947
1948 bool AllInstsAreIdentical = true;
1949 bool HasTerminator = I1->isTerminator();
1950 for (auto &SuccIter : OtherSuccIterRange) {
1951 Instruction *I2 = &*SuccIter;
1952 HasTerminator |= I2->isTerminator();
1953 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1954 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1955 AllInstsAreIdentical = false;
1956 }
1957
1958 SmallVector<Instruction *, 8> OtherInsts;
1959 for (auto &SuccIter : OtherSuccIterRange)
1960 OtherInsts.push_back(&*SuccIter);
1961
1962 // If we are hoisting the terminator instruction, don't move one (making a
1963 // broken BB), instead clone it, and remove BI.
1964 if (HasTerminator) {
1965 // Even if BB, which contains only one unreachable instruction, is ignored
1966 // at the beginning of the loop, we can hoist the terminator instruction.
1967 // If any instructions remain in the block, we cannot hoist terminators.
1968 if (NumSkipped || !AllInstsAreIdentical) {
1969 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1970 return Changed;
1971 }
1972
1973 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1974 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1975 Changed;
1976 }
1977
1978 if (AllInstsAreIdentical) {
1979 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1980 AllInstsAreIdentical =
1981 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1982 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1983 Instruction *I2 = &*Pair.first;
1984 unsigned SkipFlagsBB2 = Pair.second;
1985 // Even if the instructions are identical, it may not
1986 // be safe to hoist them if we have skipped over
1987 // instructions with side effects or their operands
1988 // weren't hoisted.
1989 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1991 });
1992 }
1993
1994 if (AllInstsAreIdentical) {
1995 BB1ItrPair.first++;
1996 // For a normal instruction, we just move one to right before the
1997 // branch, then replace all uses of the other with the first. Finally,
1998 // we remove the now redundant second instruction.
1999 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2000 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2001 // and leave any that were not hoisted behind (by calling moveBefore
2002 // rather than moveBeforePreserving).
2003 I1->moveBefore(TI->getIterator());
2004 for (auto &SuccIter : OtherSuccIterRange) {
2005 Instruction *I2 = &*SuccIter++;
2006 assert(I2 != I1);
2007 if (!I2->use_empty())
2008 I2->replaceAllUsesWith(I1);
2009 I1->andIRFlags(I2);
2010 if (auto *CB = dyn_cast<CallBase>(I1)) {
2011 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2012 assert(Success && "We should not be trying to hoist callbases "
2013 "with non-intersectable attributes");
2014 // For NDEBUG Compile.
2015 (void)Success;
2016 }
2017
2018 combineMetadataForCSE(I1, I2, true);
2019 // I1 and I2 are being combined into a single instruction. Its debug
2020 // location is the merged locations of the original instructions.
2021 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2022 I2->eraseFromParent();
2023 }
2024 if (!Changed)
2025 NumHoistCommonCode += SuccIterPairs.size();
2026 Changed = true;
2027 NumHoistCommonInstrs += SuccIterPairs.size();
2028 } else {
2029 if (NumSkipped >= HoistCommonSkipLimit) {
2030 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2031 return Changed;
2032 }
2033 // We are about to skip over a pair of non-identical instructions. Record
2034 // if any have characteristics that would prevent reordering instructions
2035 // across them.
2036 for (auto &SuccIterPair : SuccIterPairs) {
2037 Instruction *I = &*SuccIterPair.first++;
2038 SuccIterPair.second |= skippedInstrFlags(I);
2039 }
2040 ++NumSkipped;
2041 }
2042 }
2043}
2044
2045bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2046 Instruction *TI, Instruction *I1,
2047 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2048 ArrayRef<BasicBlock *> UniqueSuccessors) {
2049
2050 auto *BI = dyn_cast<CondBrInst>(TI);
2051
2052 bool Changed = false;
2053 BasicBlock *TIParent = TI->getParent();
2054 BasicBlock *BB1 = I1->getParent();
2055
2056 // Use only for an if statement.
2057 auto *I2 = *OtherSuccTIs.begin();
2058 auto *BB2 = I2->getParent();
2059 if (BI) {
2060 assert(OtherSuccTIs.size() == 1);
2061 assert(BI->getSuccessor(0) == I1->getParent());
2062 assert(BI->getSuccessor(1) == I2->getParent());
2063 }
2064
2065 // In the case of an if statement, we try to hoist an invoke.
2066 // FIXME: Can we define a safety predicate for CallBr?
2067 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2068 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2069 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2070 return false;
2071
2072 // TODO: callbr hoisting currently disabled pending further study.
2073 if (isa<CallBrInst>(I1))
2074 return false;
2075
2076 for (BasicBlock *Succ : successors(BB1)) {
2077 for (PHINode &PN : Succ->phis()) {
2078 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2079 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2080 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2081 if (BB1V == BB2V)
2082 continue;
2083
2084 // In the case of an if statement, check for
2085 // passingValueIsAlwaysUndefined here because we would rather eliminate
2086 // undefined control flow then converting it to a select.
2087 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2089 return false;
2090 }
2091 }
2092 }
2093
2094 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2095 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2096 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2097 // Clone the terminator and hoist it into the pred, without any debug info.
2098 Instruction *NT = I1->clone();
2099 NT->insertInto(TIParent, TI->getIterator());
2100 if (!NT->getType()->isVoidTy()) {
2101 I1->replaceAllUsesWith(NT);
2102 for (Instruction *OtherSuccTI : OtherSuccTIs)
2103 OtherSuccTI->replaceAllUsesWith(NT);
2104 NT->takeName(I1);
2105 }
2106 Changed = true;
2107 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2108
2109 // Ensure terminator gets a debug location, even an unknown one, in case
2110 // it involves inlinable calls.
2112 Locs.push_back(I1->getDebugLoc());
2113 for (auto *OtherSuccTI : OtherSuccTIs)
2114 Locs.push_back(OtherSuccTI->getDebugLoc());
2115 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2116
2117 // PHIs created below will adopt NT's merged DebugLoc.
2118 IRBuilder<NoFolder> Builder(NT);
2119
2120 // In the case of an if statement, hoisting one of the terminators from our
2121 // successor is a great thing. Unfortunately, the successors of the if/else
2122 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2123 // must agree for all PHI nodes, so we insert select instruction to compute
2124 // the final result.
2125 if (BI) {
2126 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2127 for (BasicBlock *Succ : successors(BB1)) {
2128 for (PHINode &PN : Succ->phis()) {
2129 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2130 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2131 if (BB1V == BB2V)
2132 continue;
2133
2134 // These values do not agree. Insert a select instruction before NT
2135 // that determines the right value.
2136 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2137 if (!SI) {
2138 // Propagate fast-math-flags from phi node to its replacement select.
2140 BI->getCondition(), BB1V, BB2V,
2141 isa<FPMathOperator>(PN) ? &PN : nullptr,
2142 BB1V->getName() + "." + BB2V->getName(), BI));
2143 }
2144
2145 // Make the PHI node use the select for all incoming values for BB1/BB2
2146 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2147 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2148 PN.setIncomingValue(i, SI);
2149 }
2150 }
2151 }
2152
2154
2155 // Update any PHI nodes in our new successors.
2156 for (BasicBlock *Succ : successors(BB1)) {
2157 addPredecessorToBlock(Succ, TIParent, BB1);
2158 if (DTU)
2159 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2160 }
2161
2162 if (DTU) {
2163 // TI might be a switch with multi-cases destination, so we need to care for
2164 // the duplication of successors.
2165 for (BasicBlock *Succ : UniqueSuccessors)
2166 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2167 }
2168
2170 if (DTU)
2171 DTU->applyUpdates(Updates);
2172 return Changed;
2173}
2174
2175// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2176// into variables.
2178 int OpIdx) {
2179 // Divide/Remainder by constant is typically much cheaper than by variable.
2180 if (I->isIntDivRem())
2181 return OpIdx != 1;
2182 return !isa<IntrinsicInst>(I);
2183}
2184
2185// All instructions in Insts belong to different blocks that all unconditionally
2186// branch to a common successor. Analyze each instruction and return true if it
2187// would be possible to sink them into their successor, creating one common
2188// instruction instead. For every value that would be required to be provided by
2189// PHI node (because an operand varies in each input block), add to PHIOperands.
2192 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2193 // Prune out obviously bad instructions to move. Each instruction must have
2194 // the same number of uses, and we check later that the uses are consistent.
2195 std::optional<unsigned> NumUses;
2196 for (auto *I : Insts) {
2197 // These instructions may change or break semantics if moved.
2198 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2199 I->getType()->isTokenTy())
2200 return false;
2201
2202 // Do not try to sink an instruction in an infinite loop - it can cause
2203 // this algorithm to infinite loop.
2204 if (I->getParent()->getSingleSuccessor() == I->getParent())
2205 return false;
2206
2207 // Conservatively return false if I is an inline-asm instruction. Sinking
2208 // and merging inline-asm instructions can potentially create arguments
2209 // that cannot satisfy the inline-asm constraints.
2210 // If the instruction has nomerge or convergent attribute, return false.
2211 if (const auto *C = dyn_cast<CallBase>(I))
2212 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2213 return false;
2214
2215 if (!NumUses)
2216 NumUses = I->getNumUses();
2217 else if (NumUses != I->getNumUses())
2218 return false;
2219 }
2220
2221 const Instruction *I0 = Insts.front();
2222 const auto I0MMRA = MMRAMetadata(*I0);
2223 for (auto *I : Insts) {
2224 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2225 return false;
2226
2227 // Treat MMRAs conservatively. This pass can be quite aggressive and
2228 // could drop a lot of MMRAs otherwise.
2229 if (MMRAMetadata(*I) != I0MMRA)
2230 return false;
2231 }
2232
2233 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2234 // then the other phi operands must match the instructions from Insts. This
2235 // also has to hold true for any phi nodes that would be created as a result
2236 // of sinking. Both of these cases are represented by PhiOperands.
2237 for (const Use &U : I0->uses()) {
2238 auto It = PHIOperands.find(&U);
2239 if (It == PHIOperands.end())
2240 // There may be uses in other blocks when sinking into a loop header.
2241 return false;
2242 if (!equal(Insts, It->second))
2243 return false;
2244 }
2245
2246 // For calls to be sinkable, they must all be indirect, or have same callee.
2247 // I.e. if we have two direct calls to different callees, we don't want to
2248 // turn that into an indirect call. Likewise, if we have an indirect call,
2249 // and a direct call, we don't actually want to have a single indirect call.
2250 if (isa<CallBase>(I0)) {
2251 auto IsIndirectCall = [](const Instruction *I) {
2252 return cast<CallBase>(I)->isIndirectCall();
2253 };
2254 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2255 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2256 if (HaveIndirectCalls) {
2257 if (!AllCallsAreIndirect)
2258 return false;
2259 } else {
2260 // All callees must be identical.
2261 Value *Callee = nullptr;
2262 for (const Instruction *I : Insts) {
2263 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2264 if (!Callee)
2265 Callee = CurrCallee;
2266 else if (Callee != CurrCallee)
2267 return false;
2268 }
2269 }
2270 }
2271
2272 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2273 Value *Op = I0->getOperand(OI);
2274 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2275 assert(I->getNumOperands() == I0->getNumOperands());
2276 return I->getOperand(OI) == I0->getOperand(OI);
2277 };
2278 if (!all_of(Insts, SameAsI0)) {
2281 // We can't create a PHI from this GEP.
2282 return false;
2283 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2284 for (auto *I : Insts)
2285 Ops.push_back(I->getOperand(OI));
2286 }
2287 }
2288 return true;
2289}
2290
2291// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2292// instruction of every block in Blocks to their common successor, commoning
2293// into one instruction.
2295 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2296
2297 // canSinkInstructions returning true guarantees that every block has at
2298 // least one non-terminator instruction.
2300 for (auto *BB : Blocks) {
2301 Instruction *I = BB->getTerminator();
2302 I = I->getPrevNode();
2303 Insts.push_back(I);
2304 }
2305
2306 // We don't need to do any more checking here; canSinkInstructions should
2307 // have done it all for us.
2308 SmallVector<Value*, 4> NewOperands;
2309 Instruction *I0 = Insts.front();
2310 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2311 // This check is different to that in canSinkInstructions. There, we
2312 // cared about the global view once simplifycfg (and instcombine) have
2313 // completed - it takes into account PHIs that become trivially
2314 // simplifiable. However here we need a more local view; if an operand
2315 // differs we create a PHI and rely on instcombine to clean up the very
2316 // small mess we may make.
2317 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2318 return I->getOperand(O) != I0->getOperand(O);
2319 });
2320 if (!NeedPHI) {
2321 NewOperands.push_back(I0->getOperand(O));
2322 continue;
2323 }
2324
2325 // Create a new PHI in the successor block and populate it.
2326 auto *Op = I0->getOperand(O);
2327 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2328 auto *PN =
2329 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2330 PN->insertBefore(BBEnd->begin());
2331 for (auto *I : Insts)
2332 PN->addIncoming(I->getOperand(O), I->getParent());
2333 NewOperands.push_back(PN);
2334 }
2335
2336 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2337 // and move it to the start of the successor block.
2338 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2339 I0->getOperandUse(O).set(NewOperands[O]);
2340
2341 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2342
2343 // Update metadata and IR flags, and merge debug locations.
2344 for (auto *I : Insts)
2345 if (I != I0) {
2346 // The debug location for the "common" instruction is the merged locations
2347 // of all the commoned instructions. We start with the original location
2348 // of the "common" instruction and iteratively merge each location in the
2349 // loop below.
2350 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2351 // However, as N-way merge for CallInst is rare, so we use simplified API
2352 // instead of using complex API for N-way merge.
2353 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2354 combineMetadataForCSE(I0, I, true);
2355 I0->andIRFlags(I);
2356 if (auto *CB = dyn_cast<CallBase>(I0)) {
2357 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2358 assert(Success && "We should not be trying to sink callbases "
2359 "with non-intersectable attributes");
2360 // For NDEBUG Compile.
2361 (void)Success;
2362 }
2363 }
2364
2365 for (User *U : make_early_inc_range(I0->users())) {
2366 // canSinkLastInstruction checked that all instructions are only used by
2367 // phi nodes in a way that allows replacing the phi node with the common
2368 // instruction.
2369 auto *PN = cast<PHINode>(U);
2370 PN->replaceAllUsesWith(I0);
2371 PN->eraseFromParent();
2372 }
2373
2374 // Finally nuke all instructions apart from the common instruction.
2375 for (auto *I : Insts) {
2376 if (I == I0)
2377 continue;
2378 // The remaining uses are debug users, replace those with the common inst.
2379 // In most (all?) cases this just introduces a use-before-def.
2380 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2381 I->replaceAllUsesWith(I0);
2382 I->eraseFromParent();
2383 }
2384}
2385
2386/// Check whether BB's predecessors end with unconditional branches. If it is
2387/// true, sink any common code from the predecessors to BB.
2389 DomTreeUpdater *DTU) {
2390 // We support two situations:
2391 // (1) all incoming arcs are unconditional
2392 // (2) there are non-unconditional incoming arcs
2393 //
2394 // (2) is very common in switch defaults and
2395 // else-if patterns;
2396 //
2397 // if (a) f(1);
2398 // else if (b) f(2);
2399 //
2400 // produces:
2401 //
2402 // [if]
2403 // / \
2404 // [f(1)] [if]
2405 // | | \
2406 // | | |
2407 // | [f(2)]|
2408 // \ | /
2409 // [ end ]
2410 //
2411 // [end] has two unconditional predecessor arcs and one conditional. The
2412 // conditional refers to the implicit empty 'else' arc. This conditional
2413 // arc can also be caused by an empty default block in a switch.
2414 //
2415 // In this case, we attempt to sink code from all *unconditional* arcs.
2416 // If we can sink instructions from these arcs (determined during the scan
2417 // phase below) we insert a common successor for all unconditional arcs and
2418 // connect that to [end], to enable sinking:
2419 //
2420 // [if]
2421 // / \
2422 // [x(1)] [if]
2423 // | | \
2424 // | | \
2425 // | [x(2)] |
2426 // \ / |
2427 // [sink.split] |
2428 // \ /
2429 // [ end ]
2430 //
2431 SmallVector<BasicBlock*,4> UnconditionalPreds;
2432 bool HaveNonUnconditionalPredecessors = false;
2433 for (auto *PredBB : predecessors(BB)) {
2434 auto *PredBr = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2435 if (PredBr)
2436 UnconditionalPreds.push_back(PredBB);
2437 else
2438 HaveNonUnconditionalPredecessors = true;
2439 }
2440 if (UnconditionalPreds.size() < 2)
2441 return false;
2442
2443 // We take a two-step approach to tail sinking. First we scan from the end of
2444 // each block upwards in lockstep. If the n'th instruction from the end of each
2445 // block can be sunk, those instructions are added to ValuesToSink and we
2446 // carry on. If we can sink an instruction but need to PHI-merge some operands
2447 // (because they're not identical in each instruction) we add these to
2448 // PHIOperands.
2449 // We prepopulate PHIOperands with the phis that already exist in BB.
2451 for (PHINode &PN : BB->phis()) {
2453 for (const Use &U : PN.incoming_values())
2454 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2455 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2456 for (BasicBlock *Pred : UnconditionalPreds)
2457 Ops.push_back(*IncomingVals[Pred]);
2458 }
2459
2460 int ScanIdx = 0;
2461 SmallPtrSet<Value*,4> InstructionsToSink;
2462 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2463 while (LRI.isValid() &&
2464 canSinkInstructions(*LRI, PHIOperands)) {
2465 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2466 << "\n");
2467 InstructionsToSink.insert_range(*LRI);
2468 ++ScanIdx;
2469 --LRI;
2470 }
2471
2472 // If no instructions can be sunk, early-return.
2473 if (ScanIdx == 0)
2474 return false;
2475
2476 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2477
2478 if (!followedByDeoptOrUnreachable) {
2479 // Check whether this is the pointer operand of a load/store.
2480 auto IsMemOperand = [](Use &U) {
2481 auto *I = cast<Instruction>(U.getUser());
2482 if (isa<LoadInst>(I))
2483 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2484 if (isa<StoreInst>(I))
2485 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2486 return false;
2487 };
2488
2489 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2490 // actually sink before encountering instruction that is unprofitable to
2491 // sink?
2492 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2493 unsigned NumPHIInsts = 0;
2494 for (Use &U : (*LRI)[0]->operands()) {
2495 auto It = PHIOperands.find(&U);
2496 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2497 return InstructionsToSink.contains(V);
2498 })) {
2499 ++NumPHIInsts;
2500 // Do not separate a load/store from the gep producing the address.
2501 // The gep can likely be folded into the load/store as an addressing
2502 // mode. Additionally, a load of a gep is easier to analyze than a
2503 // load of a phi.
2504 if (IsMemOperand(U) &&
2505 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2506 return false;
2507 // FIXME: this check is overly optimistic. We may end up not sinking
2508 // said instruction, due to the very same profitability check.
2509 // See @creating_too_many_phis in sink-common-code.ll.
2510 }
2511 }
2512 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2513 return NumPHIInsts <= 1;
2514 };
2515
2516 // We've determined that we are going to sink last ScanIdx instructions,
2517 // and recorded them in InstructionsToSink. Now, some instructions may be
2518 // unprofitable to sink. But that determination depends on the instructions
2519 // that we are going to sink.
2520
2521 // First, forward scan: find the first instruction unprofitable to sink,
2522 // recording all the ones that are profitable to sink.
2523 // FIXME: would it be better, after we detect that not all are profitable.
2524 // to either record the profitable ones, or erase the unprofitable ones?
2525 // Maybe we need to choose (at runtime) the one that will touch least
2526 // instrs?
2527 LRI.reset();
2528 int Idx = 0;
2529 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2530 while (Idx < ScanIdx) {
2531 if (!ProfitableToSinkInstruction(LRI)) {
2532 // Too many PHIs would be created.
2533 LLVM_DEBUG(
2534 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2535 break;
2536 }
2537 InstructionsProfitableToSink.insert_range(*LRI);
2538 --LRI;
2539 ++Idx;
2540 }
2541
2542 // If no instructions can be sunk, early-return.
2543 if (Idx == 0)
2544 return false;
2545
2546 // Did we determine that (only) some instructions are unprofitable to sink?
2547 if (Idx < ScanIdx) {
2548 // Okay, some instructions are unprofitable.
2549 ScanIdx = Idx;
2550 InstructionsToSink = InstructionsProfitableToSink;
2551
2552 // But, that may make other instructions unprofitable, too.
2553 // So, do a backward scan, do any earlier instructions become
2554 // unprofitable?
2555 assert(
2556 !ProfitableToSinkInstruction(LRI) &&
2557 "We already know that the last instruction is unprofitable to sink");
2558 ++LRI;
2559 --Idx;
2560 while (Idx >= 0) {
2561 // If we detect that an instruction becomes unprofitable to sink,
2562 // all earlier instructions won't be sunk either,
2563 // so preemptively keep InstructionsProfitableToSink in sync.
2564 // FIXME: is this the most performant approach?
2565 for (auto *I : *LRI)
2566 InstructionsProfitableToSink.erase(I);
2567 if (!ProfitableToSinkInstruction(LRI)) {
2568 // Everything starting with this instruction won't be sunk.
2569 ScanIdx = Idx;
2570 InstructionsToSink = InstructionsProfitableToSink;
2571 }
2572 ++LRI;
2573 --Idx;
2574 }
2575 }
2576
2577 // If no instructions can be sunk, early-return.
2578 if (ScanIdx == 0)
2579 return false;
2580 }
2581
2582 bool Changed = false;
2583
2584 if (HaveNonUnconditionalPredecessors) {
2585 if (!followedByDeoptOrUnreachable) {
2586 // It is always legal to sink common instructions from unconditional
2587 // predecessors. However, if not all predecessors are unconditional,
2588 // this transformation might be pessimizing. So as a rule of thumb,
2589 // don't do it unless we'd sink at least one non-speculatable instruction.
2590 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2591 LRI.reset();
2592 int Idx = 0;
2593 bool Profitable = false;
2594 while (Idx < ScanIdx) {
2595 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2596 Profitable = true;
2597 break;
2598 }
2599 --LRI;
2600 ++Idx;
2601 }
2602 if (!Profitable)
2603 return false;
2604 }
2605
2606 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2607 // We have a conditional edge and we're going to sink some instructions.
2608 // Insert a new block postdominating all blocks we're going to sink from.
2609 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2610 // Edges couldn't be split.
2611 return false;
2612 Changed = true;
2613 }
2614
2615 // Now that we've analyzed all potential sinking candidates, perform the
2616 // actual sink. We iteratively sink the last non-terminator of the source
2617 // blocks into their common successor unless doing so would require too
2618 // many PHI instructions to be generated (currently only one PHI is allowed
2619 // per sunk instruction).
2620 //
2621 // We can use InstructionsToSink to discount values needing PHI-merging that will
2622 // actually be sunk in a later iteration. This allows us to be more
2623 // aggressive in what we sink. This does allow a false positive where we
2624 // sink presuming a later value will also be sunk, but stop half way through
2625 // and never actually sink it which means we produce more PHIs than intended.
2626 // This is unlikely in practice though.
2627 int SinkIdx = 0;
2628 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2629 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2630 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2631 << "\n");
2632
2633 // Because we've sunk every instruction in turn, the current instruction to
2634 // sink is always at index 0.
2635 LRI.reset();
2636
2637 sinkLastInstruction(UnconditionalPreds);
2638 NumSinkCommonInstrs++;
2639 Changed = true;
2640 }
2641 if (SinkIdx != 0)
2642 ++NumSinkCommonCode;
2643 return Changed;
2644}
2645
2646namespace {
2647
2648struct CompatibleSets {
2649 using SetTy = SmallVector<InvokeInst *, 2>;
2650
2652
2653 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2654
2655 SetTy &getCompatibleSet(InvokeInst *II);
2656
2657 void insert(InvokeInst *II);
2658};
2659
2660CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2661 // Perform a linear scan over all the existing sets, see if the new `invoke`
2662 // is compatible with any particular set. Since we know that all the `invokes`
2663 // within a set are compatible, only check the first `invoke` in each set.
2664 // WARNING: at worst, this has quadratic complexity.
2665 for (CompatibleSets::SetTy &Set : Sets) {
2666 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2667 return Set;
2668 }
2669
2670 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2671 return Sets.emplace_back();
2672}
2673
2674void CompatibleSets::insert(InvokeInst *II) {
2675 getCompatibleSet(II).emplace_back(II);
2676}
2677
2678bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2679 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2680
2681 // Can we theoretically merge these `invoke`s?
2682 auto IsIllegalToMerge = [](InvokeInst *II) {
2683 return II->cannotMerge() || II->isInlineAsm();
2684 };
2685 if (any_of(Invokes, IsIllegalToMerge))
2686 return false;
2687
2688 // Either both `invoke`s must be direct,
2689 // or both `invoke`s must be indirect.
2690 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2691 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2692 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2693 if (HaveIndirectCalls) {
2694 if (!AllCallsAreIndirect)
2695 return false;
2696 } else {
2697 // All callees must be identical.
2698 Value *Callee = nullptr;
2699 for (InvokeInst *II : Invokes) {
2700 Value *CurrCallee = II->getCalledOperand();
2701 assert(CurrCallee && "There is always a called operand.");
2702 if (!Callee)
2703 Callee = CurrCallee;
2704 else if (Callee != CurrCallee)
2705 return false;
2706 }
2707 }
2708
2709 // Either both `invoke`s must not have a normal destination,
2710 // or both `invoke`s must have a normal destination,
2711 auto HasNormalDest = [](InvokeInst *II) {
2712 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2713 };
2714 if (any_of(Invokes, HasNormalDest)) {
2715 // Do not merge `invoke` that does not have a normal destination with one
2716 // that does have a normal destination, even though doing so would be legal.
2717 if (!all_of(Invokes, HasNormalDest))
2718 return false;
2719
2720 // All normal destinations must be identical.
2721 BasicBlock *NormalBB = nullptr;
2722 for (InvokeInst *II : Invokes) {
2723 BasicBlock *CurrNormalBB = II->getNormalDest();
2724 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2725 if (!NormalBB)
2726 NormalBB = CurrNormalBB;
2727 else if (NormalBB != CurrNormalBB)
2728 return false;
2729 }
2730
2731 // In the normal destination, the incoming values for these two `invoke`s
2732 // must be compatible.
2733 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2735 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2736 &EquivalenceSet))
2737 return false;
2738 }
2739
2740#ifndef NDEBUG
2741 // All unwind destinations must be identical.
2742 // We know that because we have started from said unwind destination.
2743 BasicBlock *UnwindBB = nullptr;
2744 for (InvokeInst *II : Invokes) {
2745 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2746 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2747 if (!UnwindBB)
2748 UnwindBB = CurrUnwindBB;
2749 else
2750 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2751 }
2752#endif
2753
2754 // In the unwind destination, the incoming values for these two `invoke`s
2755 // must be compatible.
2757 Invokes.front()->getUnwindDest(),
2758 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2759 return false;
2760
2761 // Ignoring arguments, these `invoke`s must be identical,
2762 // including operand bundles.
2763 const InvokeInst *II0 = Invokes.front();
2764 for (auto *II : Invokes.drop_front())
2765 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2766 return false;
2767
2768 // Can we theoretically form the data operands for the merged `invoke`?
2769 auto IsIllegalToMergeArguments = [](auto Ops) {
2770 Use &U0 = std::get<0>(Ops);
2771 Use &U1 = std::get<1>(Ops);
2772 if (U0 == U1)
2773 return false;
2775 U0.getOperandNo());
2776 };
2777 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2778 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2779 IsIllegalToMergeArguments))
2780 return false;
2781
2782 return true;
2783}
2784
2785} // namespace
2786
2787// Merge all invokes in the provided set, all of which are compatible
2788// as per the `CompatibleSets::shouldBelongToSameSet()`.
2790 DomTreeUpdater *DTU) {
2791 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2792
2794 if (DTU)
2795 Updates.reserve(2 + 3 * Invokes.size());
2796
2797 bool HasNormalDest =
2798 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2799
2800 // Clone one of the invokes into a new basic block.
2801 // Since they are all compatible, it doesn't matter which invoke is cloned.
2802 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2803 InvokeInst *II0 = Invokes.front();
2804 BasicBlock *II0BB = II0->getParent();
2805 BasicBlock *InsertBeforeBlock =
2806 II0->getParent()->getIterator()->getNextNode();
2807 Function *Func = II0BB->getParent();
2808 LLVMContext &Ctx = II0->getContext();
2809
2810 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2811 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2812
2813 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2814 // NOTE: all invokes have the same attributes, so no handling needed.
2815 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2816
2817 if (!HasNormalDest) {
2818 // This set does not have a normal destination,
2819 // so just form a new block with unreachable terminator.
2820 BasicBlock *MergedNormalDest = BasicBlock::Create(
2821 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2822 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2823 UI->setDebugLoc(DebugLoc::getTemporary());
2824 MergedInvoke->setNormalDest(MergedNormalDest);
2825 }
2826
2827 // The unwind destination, however, remainds identical for all invokes here.
2828
2829 return MergedInvoke;
2830 }();
2831
2832 if (DTU) {
2833 // Predecessor blocks that contained these invokes will now branch to
2834 // the new block that contains the merged invoke, ...
2835 for (InvokeInst *II : Invokes)
2836 Updates.push_back(
2837 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2838
2839 // ... which has the new `unreachable` block as normal destination,
2840 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2841 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2842 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2843 SuccBBOfMergedInvoke});
2844
2845 // Since predecessor blocks now unconditionally branch to a new block,
2846 // they no longer branch to their original successors.
2847 for (InvokeInst *II : Invokes)
2848 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2849 Updates.push_back(
2850 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2851 }
2852
2853 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2854
2855 // Form the merged operands for the merged invoke.
2856 for (Use &U : MergedInvoke->operands()) {
2857 // Only PHI together the indirect callees and data operands.
2858 if (MergedInvoke->isCallee(&U)) {
2859 if (!IsIndirectCall)
2860 continue;
2861 } else if (!MergedInvoke->isDataOperand(&U))
2862 continue;
2863
2864 // Don't create trivial PHI's with all-identical incoming values.
2865 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2866 return II->getOperand(U.getOperandNo()) != U.get();
2867 });
2868 if (!NeedPHI)
2869 continue;
2870
2871 // Form a PHI out of all the data ops under this index.
2873 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2874 for (InvokeInst *II : Invokes)
2875 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2876
2877 U.set(PN);
2878 }
2879
2880 // We've ensured that each PHI node has compatible (identical) incoming values
2881 // when coming from each of the `invoke`s in the current merge set,
2882 // so update the PHI nodes accordingly.
2883 for (BasicBlock *Succ : successors(MergedInvoke))
2884 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2885 /*ExistPred=*/Invokes.front()->getParent());
2886
2887 // And finally, replace the original `invoke`s with an unconditional branch
2888 // to the block with the merged `invoke`. Also, give that merged `invoke`
2889 // the merged debugloc of all the original `invoke`s.
2890 DILocation *MergedDebugLoc = nullptr;
2891 for (InvokeInst *II : Invokes) {
2892 // Compute the debug location common to all the original `invoke`s.
2893 if (!MergedDebugLoc)
2894 MergedDebugLoc = II->getDebugLoc();
2895 else
2896 MergedDebugLoc =
2897 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2898
2899 // And replace the old `invoke` with an unconditionally branch
2900 // to the block with the merged `invoke`.
2901 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2902 OrigSuccBB->removePredecessor(II->getParent());
2903 auto *BI = UncondBrInst::Create(MergedInvoke->getParent(), II->getParent());
2904 // The unconditional branch is part of the replacement for the original
2905 // invoke, so should use its DebugLoc.
2906 BI->setDebugLoc(II->getDebugLoc());
2907 bool Success = MergedInvoke->tryIntersectAttributes(II);
2908 assert(Success && "Merged invokes with incompatible attributes");
2909 // For NDEBUG Compile
2910 (void)Success;
2911 II->replaceAllUsesWith(MergedInvoke);
2912 II->eraseFromParent();
2913 ++NumInvokesMerged;
2914 }
2915 MergedInvoke->setDebugLoc(MergedDebugLoc);
2916 ++NumInvokeSetsFormed;
2917
2918 if (DTU)
2919 DTU->applyUpdates(Updates);
2920}
2921
2922/// If this block is a `landingpad` exception handling block, categorize all
2923/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2924/// being "mergeable" together, and then merge invokes in each set together.
2925///
2926/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2927/// [...] [...]
2928/// | |
2929/// [invoke0] [invoke1]
2930/// / \ / \
2931/// [cont0] [landingpad] [cont1]
2932/// to:
2933/// [...] [...]
2934/// \ /
2935/// [invoke]
2936/// / \
2937/// [cont] [landingpad]
2938///
2939/// But of course we can only do that if the invokes share the `landingpad`,
2940/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2941/// and the invoked functions are "compatible".
2944 return false;
2945
2946 bool Changed = false;
2947
2948 // FIXME: generalize to all exception handling blocks?
2949 if (!BB->isLandingPad())
2950 return Changed;
2951
2952 CompatibleSets Grouper;
2953
2954 // Record all the predecessors of this `landingpad`. As per verifier,
2955 // the only allowed predecessor is the unwind edge of an `invoke`.
2956 // We want to group "compatible" `invokes` into the same set to be merged.
2957 for (BasicBlock *PredBB : predecessors(BB))
2958 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2959
2960 // And now, merge `invoke`s that were grouped togeter.
2961 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2962 if (Invokes.size() < 2)
2963 continue;
2964 Changed = true;
2965 mergeCompatibleInvokesImpl(Invokes, DTU);
2966 }
2967
2968 return Changed;
2969}
2970
2971namespace {
2972/// Track ephemeral values, which should be ignored for cost-modelling
2973/// purposes. Requires walking instructions in reverse order.
2974class EphemeralValueTracker {
2975 SmallPtrSet<const Instruction *, 32> EphValues;
2976
2977 bool isEphemeral(const Instruction *I) {
2978 if (isa<AssumeInst>(I))
2979 return true;
2980 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2981 all_of(I->users(), [&](const User *U) {
2982 return EphValues.count(cast<Instruction>(U));
2983 });
2984 }
2985
2986public:
2987 bool track(const Instruction *I) {
2988 if (isEphemeral(I)) {
2989 EphValues.insert(I);
2990 return true;
2991 }
2992 return false;
2993 }
2994
2995 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2996};
2997} // namespace
2998
2999/// Determine if we can hoist sink a sole store instruction out of a
3000/// conditional block.
3001///
3002/// We are looking for code like the following:
3003/// BrBB:
3004/// store i32 %add, i32* %arrayidx2
3005/// ... // No other stores or function calls (we could be calling a memory
3006/// ... // function).
3007/// %cmp = icmp ult %x, %y
3008/// br i1 %cmp, label %EndBB, label %ThenBB
3009/// ThenBB:
3010/// store i32 %add5, i32* %arrayidx2
3011/// br label EndBB
3012/// EndBB:
3013/// ...
3014/// We are going to transform this into:
3015/// BrBB:
3016/// store i32 %add, i32* %arrayidx2
3017/// ... //
3018/// %cmp = icmp ult %x, %y
3019/// %add.add5 = select i1 %cmp, i32 %add, %add5
3020/// store i32 %add.add5, i32* %arrayidx2
3021/// ...
3022///
3023/// \return The pointer to the value of the previous store if the store can be
3024/// hoisted into the predecessor block. 0 otherwise.
3026 BasicBlock *StoreBB, BasicBlock *EndBB) {
3027 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3028 if (!StoreToHoist)
3029 return nullptr;
3030
3031 // Volatile or atomic.
3032 if (!StoreToHoist->isSimple())
3033 return nullptr;
3034
3035 Value *StorePtr = StoreToHoist->getPointerOperand();
3036 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3037
3038 // Look for a store to the same pointer in BrBB.
3039 unsigned MaxNumInstToLookAt = 9;
3040 // Skip pseudo probe intrinsic calls which are not really killing any memory
3041 // accesses.
3042 for (Instruction &CurI : reverse(*BrBB)) {
3043 if (!MaxNumInstToLookAt)
3044 break;
3045 --MaxNumInstToLookAt;
3046
3047 if (isa<PseudoProbeInst>(CurI))
3048 continue;
3049
3050 // Could be calling an instruction that affects memory like free().
3051 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3052 return nullptr;
3053
3054 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3055 // Found the previous store to same location and type. Make sure it is
3056 // simple, to avoid introducing a spurious non-atomic write after an
3057 // atomic write.
3058 if (SI->getPointerOperand() == StorePtr &&
3059 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3060 SI->getAlign() >= StoreToHoist->getAlign())
3061 // Found the previous store, return its value operand.
3062 return SI->getValueOperand();
3063 return nullptr; // Unknown store.
3064 }
3065
3066 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3067 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3068 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3069 Value *Obj = getUnderlyingObject(StorePtr);
3070 bool ExplicitlyDereferenceableOnly;
3071 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3074 .WithoutRet) &&
3075 (!ExplicitlyDereferenceableOnly ||
3076 isDereferenceablePointer(StorePtr, StoreTy,
3077 LI->getDataLayout()))) {
3078 // Found a previous load, return it.
3079 return LI;
3080 }
3081 }
3082 // The load didn't work out, but we may still find a store.
3083 }
3084 }
3085
3086 return nullptr;
3087}
3088
3089/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3090/// converted to selects.
3092 BasicBlock *EndBB,
3093 unsigned &SpeculatedInstructions,
3094 InstructionCost &Cost,
3095 const TargetTransformInfo &TTI) {
3097 BB->getParent()->hasMinSize()
3100
3101 bool HaveRewritablePHIs = false;
3102 for (PHINode &PN : EndBB->phis()) {
3103 Value *OrigV = PN.getIncomingValueForBlock(BB);
3104 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3105
3106 // FIXME: Try to remove some of the duplication with
3107 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3108 if (ThenV == OrigV)
3109 continue;
3110
3111 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3112 CmpInst::makeCmpResultType(PN.getType()),
3114
3115 // Don't convert to selects if we could remove undefined behavior instead.
3116 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3118 return false;
3119
3120 HaveRewritablePHIs = true;
3121 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3122 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3123 if (!OrigCE && !ThenCE)
3124 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3125
3126 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3127 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3128 InstructionCost MaxCost =
3130 if (OrigCost + ThenCost > MaxCost)
3131 return false;
3132
3133 // Account for the cost of an unfolded ConstantExpr which could end up
3134 // getting expanded into Instructions.
3135 // FIXME: This doesn't account for how many operations are combined in the
3136 // constant expression.
3137 ++SpeculatedInstructions;
3138 if (SpeculatedInstructions > 1)
3139 return false;
3140 }
3141
3142 return HaveRewritablePHIs;
3143}
3144
3146 std::optional<bool> Invert,
3147 const TargetTransformInfo &TTI) {
3148 // If the branch is non-unpredictable, and is predicted to *not* branch to
3149 // the `then` block, then avoid speculating it.
3150 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3151 return true;
3152
3153 uint64_t TWeight, FWeight;
3154 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3155 return true;
3156
3157 if (!Invert.has_value())
3158 return false;
3159
3160 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3161 BranchProbability BIEndProb =
3162 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3163 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3164 return BIEndProb < Likely;
3165}
3166
3167/// Speculate a conditional basic block flattening the CFG.
3168///
3169/// Note that this is a very risky transform currently. Speculating
3170/// instructions like this is most often not desirable. Instead, there is an MI
3171/// pass which can do it with full awareness of the resource constraints.
3172/// However, some cases are "obvious" and we should do directly. An example of
3173/// this is speculating a single, reasonably cheap instruction.
3174///
3175/// There is only one distinct advantage to flattening the CFG at the IR level:
3176/// it makes very common but simplistic optimizations such as are common in
3177/// instcombine and the DAG combiner more powerful by removing CFG edges and
3178/// modeling their effects with easier to reason about SSA value graphs.
3179///
3180///
3181/// An illustration of this transform is turning this IR:
3182/// \code
3183/// BB:
3184/// %cmp = icmp ult %x, %y
3185/// br i1 %cmp, label %EndBB, label %ThenBB
3186/// ThenBB:
3187/// %sub = sub %x, %y
3188/// br label BB2
3189/// EndBB:
3190/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3191/// ...
3192/// \endcode
3193///
3194/// Into this IR:
3195/// \code
3196/// BB:
3197/// %cmp = icmp ult %x, %y
3198/// %sub = sub %x, %y
3199/// %cond = select i1 %cmp, 0, %sub
3200/// ...
3201/// \endcode
3202///
3203/// \returns true if the conditional block is removed.
3204bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3205 BasicBlock *ThenBB) {
3206 if (!Options.SpeculateBlocks)
3207 return false;
3208
3209 // Be conservative for now. FP select instruction can often be expensive.
3210 Value *BrCond = BI->getCondition();
3211 if (isa<FCmpInst>(BrCond))
3212 return false;
3213
3214 BasicBlock *BB = BI->getParent();
3215 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3216 InstructionCost Budget =
3218
3219 // If ThenBB is actually on the false edge of the conditional branch, remember
3220 // to swap the select operands later.
3221 bool Invert = false;
3222 if (ThenBB != BI->getSuccessor(0)) {
3223 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3224 Invert = true;
3225 }
3226 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3227
3228 if (!isProfitableToSpeculate(BI, Invert, TTI))
3229 return false;
3230
3231 // Keep a count of how many times instructions are used within ThenBB when
3232 // they are candidates for sinking into ThenBB. Specifically:
3233 // - They are defined in BB, and
3234 // - They have no side effects, and
3235 // - All of their uses are in ThenBB.
3236 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3237
3238 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3239
3240 unsigned SpeculatedInstructions = 0;
3241 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3242 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3243 Value *SpeculatedStoreValue = nullptr;
3244 StoreInst *SpeculatedStore = nullptr;
3245 EphemeralValueTracker EphTracker;
3246 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3247 // Skip pseudo probes. The consequence is we lose track of the branch
3248 // probability for ThenBB, which is fine since the optimization here takes
3249 // place regardless of the branch probability.
3250 if (isa<PseudoProbeInst>(I)) {
3251 // The probe should be deleted so that it will not be over-counted when
3252 // the samples collected on the non-conditional path are counted towards
3253 // the conditional path. We leave it for the counts inference algorithm to
3254 // figure out a proper count for an unknown probe.
3255 SpeculatedPseudoProbes.push_back(&I);
3256 continue;
3257 }
3258
3259 // Ignore ephemeral values, they will be dropped by the transform.
3260 if (EphTracker.track(&I))
3261 continue;
3262
3263 // Only speculatively execute a single instruction (not counting the
3264 // terminator) for now.
3265 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3267 SpeculatedConditionalLoadsStores.size() <
3269 // Not count load/store into cost if target supports conditional faulting
3270 // b/c it's cheap to speculate it.
3271 if (IsSafeCheapLoadStore)
3272 SpeculatedConditionalLoadsStores.push_back(&I);
3273 else
3274 ++SpeculatedInstructions;
3275
3276 if (SpeculatedInstructions > 1)
3277 return false;
3278
3279 // Don't hoist the instruction if it's unsafe or expensive.
3280 if (!IsSafeCheapLoadStore &&
3282 !(HoistCondStores && !SpeculatedStoreValue &&
3283 (SpeculatedStoreValue =
3284 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3285 return false;
3286 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3289 return false;
3290
3291 // Store the store speculation candidate.
3292 if (!SpeculatedStore && SpeculatedStoreValue)
3293 SpeculatedStore = cast<StoreInst>(&I);
3294
3295 // Do not hoist the instruction if any of its operands are defined but not
3296 // used in BB. The transformation will prevent the operand from
3297 // being sunk into the use block.
3298 for (Use &Op : I.operands()) {
3300 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3301 continue; // Not a candidate for sinking.
3302
3303 ++SinkCandidateUseCounts[OpI];
3304 }
3305 }
3306
3307 // Consider any sink candidates which are only used in ThenBB as costs for
3308 // speculation. Note, while we iterate over a DenseMap here, we are summing
3309 // and so iteration order isn't significant.
3310 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3311 if (Inst->hasNUses(Count)) {
3312 ++SpeculatedInstructions;
3313 if (SpeculatedInstructions > 1)
3314 return false;
3315 }
3316
3317 // Check that we can insert the selects and that it's not too expensive to do
3318 // so.
3319 bool Convert =
3320 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3322 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3323 SpeculatedInstructions, Cost, TTI);
3324 if (!Convert || Cost > Budget)
3325 return false;
3326
3327 // If we get here, we can hoist the instruction and if-convert.
3328 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3329
3330 Instruction *Sel = nullptr;
3331 // Insert a select of the value of the speculated store.
3332 if (SpeculatedStoreValue) {
3333 IRBuilder<NoFolder> Builder(BI);
3334 Value *OrigV = SpeculatedStore->getValueOperand();
3335 Value *TrueV = SpeculatedStore->getValueOperand();
3336 Value *FalseV = SpeculatedStoreValue;
3337 if (Invert)
3338 std::swap(TrueV, FalseV);
3339 Value *S = Builder.CreateSelect(
3340 BrCond, TrueV, FalseV, "spec.store.select", BI);
3341 Sel = cast<Instruction>(S);
3342 SpeculatedStore->setOperand(0, S);
3343 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3344 SpeculatedStore->getDebugLoc());
3345 // The value stored is still conditional, but the store itself is now
3346 // unconditionally executed, so we must be sure that any linked dbg.assign
3347 // intrinsics are tracking the new stored value (the result of the
3348 // select). If we don't, and the store were to be removed by another pass
3349 // (e.g. DSE), then we'd eventually end up emitting a location describing
3350 // the conditional value, unconditionally.
3351 //
3352 // === Before this transformation ===
3353 // pred:
3354 // store %one, %x.dest, !DIAssignID !1
3355 // dbg.assign %one, "x", ..., !1, ...
3356 // br %cond if.then
3357 //
3358 // if.then:
3359 // store %two, %x.dest, !DIAssignID !2
3360 // dbg.assign %two, "x", ..., !2, ...
3361 //
3362 // === After this transformation ===
3363 // pred:
3364 // store %one, %x.dest, !DIAssignID !1
3365 // dbg.assign %one, "x", ..., !1
3366 /// ...
3367 // %merge = select %cond, %two, %one
3368 // store %merge, %x.dest, !DIAssignID !2
3369 // dbg.assign %merge, "x", ..., !2
3370 for (DbgVariableRecord *DbgAssign :
3371 at::getDVRAssignmentMarkers(SpeculatedStore))
3372 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3373 DbgAssign->replaceVariableLocationOp(OrigV, S);
3374 }
3375
3376 // Metadata can be dependent on the condition we are hoisting above.
3377 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3378 // to avoid making it appear as if the condition is a constant, which would
3379 // be misleading while debugging.
3380 // Similarly strip attributes that maybe dependent on condition we are
3381 // hoisting above.
3382 for (auto &I : make_early_inc_range(*ThenBB)) {
3383 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3384 I.dropLocation();
3385 }
3386 I.dropUBImplyingAttrsAndMetadata();
3387
3388 // Drop ephemeral values.
3389 if (EphTracker.contains(&I)) {
3390 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3391 I.eraseFromParent();
3392 }
3393 }
3394
3395 // Hoist the instructions.
3396 // Drop DbgVariableRecords attached to these instructions.
3397 for (auto &It : *ThenBB)
3398 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3399 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3400 // equivalent).
3401 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3402 !DVR || !DVR->isDbgAssign())
3403 It.dropOneDbgRecord(&DR);
3404 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3405 std::prev(ThenBB->end()));
3406
3407 if (!SpeculatedConditionalLoadsStores.empty())
3408 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3409 Sel);
3410
3411 // Insert selects and rewrite the PHI operands.
3412 IRBuilder<NoFolder> Builder(BI);
3413 for (PHINode &PN : EndBB->phis()) {
3414 unsigned OrigI = PN.getBasicBlockIndex(BB);
3415 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3416 Value *OrigV = PN.getIncomingValue(OrigI);
3417 Value *ThenV = PN.getIncomingValue(ThenI);
3418
3419 // Skip PHIs which are trivial.
3420 if (OrigV == ThenV)
3421 continue;
3422
3423 // Create a select whose true value is the speculatively executed value and
3424 // false value is the pre-existing value. Swap them if the branch
3425 // destinations were inverted.
3426 Value *TrueV = ThenV, *FalseV = OrigV;
3427 if (Invert)
3428 std::swap(TrueV, FalseV);
3429 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3430 PN.setIncomingValue(OrigI, V);
3431 PN.setIncomingValue(ThenI, V);
3432 }
3433
3434 // Remove speculated pseudo probes.
3435 for (Instruction *I : SpeculatedPseudoProbes)
3436 I->eraseFromParent();
3437
3438 ++NumSpeculations;
3439 return true;
3440}
3441
3442/// Return true if we can thread a branch across this block.
3444 int Size = 0;
3445 EphemeralValueTracker EphTracker;
3446
3447 // Walk the loop in reverse so that we can identify ephemeral values properly
3448 // (values only feeding assumes).
3449 for (Instruction &I : reverse(*BB)) {
3450 // Can't fold blocks that contain noduplicate or convergent calls.
3451 if (CallInst *CI = dyn_cast<CallInst>(&I))
3452 if (CI->cannotDuplicate() || CI->isConvergent())
3453 return false;
3454
3455 // Ignore ephemeral values which are deleted during codegen.
3456 // We will delete Phis while threading, so Phis should not be accounted in
3457 // block's size.
3458 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3459 if (Size++ > MaxSmallBlockSize)
3460 return false; // Don't clone large BB's.
3461 }
3462
3463 // We can only support instructions that do not define values that are
3464 // live outside of the current basic block.
3465 for (User *U : I.users()) {
3467 if (UI->getParent() != BB || isa<PHINode>(UI))
3468 return false;
3469 }
3470
3471 // Looks ok, continue checking.
3472 }
3473
3474 return true;
3475}
3476
3478 BasicBlock *To) {
3479 // Don't look past the block defining the value, we might get the value from
3480 // a previous loop iteration.
3481 auto *I = dyn_cast<Instruction>(V);
3482 if (I && I->getParent() == To)
3483 return nullptr;
3484
3485 // We know the value if the From block branches on it.
3486 auto *BI = dyn_cast<CondBrInst>(From->getTerminator());
3487 if (BI && BI->getCondition() == V &&
3488 BI->getSuccessor(0) != BI->getSuccessor(1))
3489 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3490 : ConstantInt::getFalse(BI->getContext());
3491
3492 return nullptr;
3493}
3494
3495/// If we have a conditional branch on something for which we know the constant
3496/// value in predecessors (e.g. a phi node in the current block), thread edges
3497/// from the predecessor to their ultimate destination.
3498static std::optional<bool>
3500 const DataLayout &DL,
3501 AssumptionCache *AC) {
3503 BasicBlock *BB = BI->getParent();
3504 Value *Cond = BI->getCondition();
3506 if (PN && PN->getParent() == BB) {
3507 // Degenerate case of a single entry PHI.
3508 if (PN->getNumIncomingValues() == 1) {
3510 return true;
3511 }
3512
3513 for (Use &U : PN->incoming_values())
3514 if (auto *CB = dyn_cast<ConstantInt>(U))
3515 KnownValues[CB].insert(PN->getIncomingBlock(U));
3516 } else {
3517 for (BasicBlock *Pred : predecessors(BB)) {
3518 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3519 KnownValues[CB].insert(Pred);
3520 }
3521 }
3522
3523 if (KnownValues.empty())
3524 return false;
3525
3526 // Now we know that this block has multiple preds and two succs.
3527 // Check that the block is small enough and values defined in the block are
3528 // not used outside of it.
3530 return false;
3531
3532 for (const auto &Pair : KnownValues) {
3533 // Okay, we now know that all edges from PredBB should be revectored to
3534 // branch to RealDest.
3535 ConstantInt *CB = Pair.first;
3536 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3537 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3538
3539 if (RealDest == BB)
3540 continue; // Skip self loops.
3541
3542 // Skip if the predecessor's terminator is an indirect branch.
3543 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3544 return isa<IndirectBrInst>(PredBB->getTerminator());
3545 }))
3546 continue;
3547
3548 LLVM_DEBUG({
3549 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3550 << " has value " << *Pair.first << " in predecessors:\n";
3551 for (const BasicBlock *PredBB : Pair.second)
3552 dbgs() << " " << PredBB->getName() << "\n";
3553 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3554 });
3555
3556 // Split the predecessors we are threading into a new edge block. We'll
3557 // clone the instructions into this block, and then redirect it to RealDest.
3558 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3559 if (!EdgeBB)
3560 continue;
3561
3562 // TODO: These just exist to reduce test diff, we can drop them if we like.
3563 EdgeBB->setName(RealDest->getName() + ".critedge");
3564 EdgeBB->moveBefore(RealDest);
3565
3566 // Update PHI nodes.
3567 addPredecessorToBlock(RealDest, EdgeBB, BB);
3568
3569 // BB may have instructions that are being threaded over. Clone these
3570 // instructions into EdgeBB. We know that there will be no uses of the
3571 // cloned instructions outside of EdgeBB.
3572 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3573 ValueToValueMapTy TranslateMap; // Track translated values.
3574 TranslateMap[Cond] = CB;
3575
3576 // RemoveDIs: track instructions that we optimise away while folding, so
3577 // that we can copy DbgVariableRecords from them later.
3578 BasicBlock::iterator SrcDbgCursor = BB->begin();
3579 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3580 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3581 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3582 continue;
3583 }
3584 // Clone the instruction.
3585 Instruction *N = BBI->clone();
3586 // Insert the new instruction into its new home.
3587 N->insertInto(EdgeBB, InsertPt);
3588
3589 if (BBI->hasName())
3590 N->setName(BBI->getName() + ".c");
3591
3592 // Update operands due to translation.
3593 // Key Instructions: Remap all the atom groups.
3594 if (const DebugLoc &DL = BBI->getDebugLoc())
3595 mapAtomInstance(DL, TranslateMap);
3596 RemapInstruction(N, TranslateMap,
3598
3599 // Check for trivial simplification.
3600 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3601 if (!BBI->use_empty())
3602 TranslateMap[&*BBI] = V;
3603 if (!N->mayHaveSideEffects()) {
3604 N->eraseFromParent(); // Instruction folded away, don't need actual
3605 // inst
3606 N = nullptr;
3607 }
3608 } else {
3609 if (!BBI->use_empty())
3610 TranslateMap[&*BBI] = N;
3611 }
3612 if (N) {
3613 // Copy all debug-info attached to instructions from the last we
3614 // successfully clone, up to this instruction (they might have been
3615 // folded away).
3616 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3617 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3618 SrcDbgCursor = std::next(BBI);
3619 // Clone debug-info on this instruction too.
3620 N->cloneDebugInfoFrom(&*BBI);
3621
3622 // Register the new instruction with the assumption cache if necessary.
3623 if (auto *Assume = dyn_cast<AssumeInst>(N))
3624 if (AC)
3625 AC->registerAssumption(Assume);
3626 }
3627 }
3628
3629 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3630 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3631 InsertPt->cloneDebugInfoFrom(BI);
3632
3633 BB->removePredecessor(EdgeBB);
3634 UncondBrInst *EdgeBI = cast<UncondBrInst>(EdgeBB->getTerminator());
3635 EdgeBI->setSuccessor(0, RealDest);
3636 EdgeBI->setDebugLoc(BI->getDebugLoc());
3637
3638 if (DTU) {
3640 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3641 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3642 DTU->applyUpdates(Updates);
3643 }
3644
3645 // For simplicity, we created a separate basic block for the edge. Merge
3646 // it back into the predecessor if possible. This not only avoids
3647 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3648 // bypass the check for trivial cycles above.
3649 MergeBlockIntoPredecessor(EdgeBB, DTU);
3650
3651 // Signal repeat, simplifying any other constants.
3652 return std::nullopt;
3653 }
3654
3655 return false;
3656}
3657
3658bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3659 // Note: If BB is a loop header then there is a risk that threading introduces
3660 // a non-canonical loop by moving a back edge. So we avoid this optimization
3661 // for loop headers if NeedCanonicalLoop is set.
3662 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3663 return false;
3664
3665 std::optional<bool> Result;
3666 bool EverChanged = false;
3667 do {
3668 // Note that None means "we changed things, but recurse further."
3669 Result =
3671 EverChanged |= Result == std::nullopt || *Result;
3672 } while (Result == std::nullopt);
3673 return EverChanged;
3674}
3675
3676/// Given a BB that starts with the specified two-entry PHI node,
3677/// see if we can eliminate it.
3680 const DataLayout &DL,
3681 bool SpeculateUnpredictables) {
3682 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3683 // statement", which has a very simple dominance structure. Basically, we
3684 // are trying to find the condition that is being branched on, which
3685 // subsequently causes this merge to happen. We really want control
3686 // dependence information for this check, but simplifycfg can't keep it up
3687 // to date, and this catches most of the cases we care about anyway.
3688 BasicBlock *BB = PN->getParent();
3689
3690 BasicBlock *IfTrue, *IfFalse;
3691 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3692 if (!DomBI)
3693 return false;
3694 Value *IfCond = DomBI->getCondition();
3695 // Don't bother if the branch will be constant folded trivially.
3696 if (isa<ConstantInt>(IfCond))
3697 return false;
3698
3699 BasicBlock *DomBlock = DomBI->getParent();
3701 llvm::copy_if(PN->blocks(), std::back_inserter(IfBlocks),
3702 [](BasicBlock *IfBlock) {
3703 return isa<UncondBrInst>(IfBlock->getTerminator());
3704 });
3705 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3706 "Will have either one or two blocks to speculate.");
3707
3708 // If the branch is non-unpredictable, see if we either predictably jump to
3709 // the merge bb (if we have only a single 'then' block), or if we predictably
3710 // jump to one specific 'then' block (if we have two of them).
3711 // It isn't beneficial to speculatively execute the code
3712 // from the block that we know is predictably not entered.
3713 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3714 if (!IsUnpredictable) {
3715 uint64_t TWeight, FWeight;
3716 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3717 (TWeight + FWeight) != 0) {
3718 BranchProbability BITrueProb =
3719 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3720 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3721 BranchProbability BIFalseProb = BITrueProb.getCompl();
3722 if (IfBlocks.size() == 1) {
3723 BranchProbability BIBBProb =
3724 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3725 if (BIBBProb >= Likely)
3726 return false;
3727 } else {
3728 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3729 return false;
3730 }
3731 }
3732 }
3733
3734 // Don't try to fold an unreachable block. For example, the phi node itself
3735 // can't be the candidate if-condition for a select that we want to form.
3736 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3737 if (IfCondPhiInst->getParent() == BB)
3738 return false;
3739
3740 // Okay, we found that we can merge this two-entry phi node into a select.
3741 // Doing so would require us to fold *all* two entry phi nodes in this block.
3742 // At some point this becomes non-profitable (particularly if the target
3743 // doesn't support cmov's). Only do this transformation if there are two or
3744 // fewer PHI nodes in this block.
3745 unsigned NumPhis = 0;
3746 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3747 if (NumPhis > 2)
3748 return false;
3749
3750 // Loop over the PHI's seeing if we can promote them all to select
3751 // instructions. While we are at it, keep track of the instructions
3752 // that need to be moved to the dominating block.
3753 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3754 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3755 InstructionCost Cost = 0;
3756 InstructionCost Budget =
3758 if (SpeculateUnpredictables && IsUnpredictable)
3759 Budget += TTI.getBranchMispredictPenalty();
3760
3761 bool Changed = false;
3762 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3763 PHINode *PN = cast<PHINode>(II++);
3764 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3765 PN->replaceAllUsesWith(V);
3766 PN->eraseFromParent();
3767 Changed = true;
3768 continue;
3769 }
3770
3771 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3772 AggressiveInsts, Cost, Budget, TTI, AC,
3773 ZeroCostInstructions) ||
3774 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3775 AggressiveInsts, Cost, Budget, TTI, AC,
3776 ZeroCostInstructions))
3777 return Changed;
3778 }
3779
3780 // If we folded the first phi, PN dangles at this point. Refresh it. If
3781 // we ran out of PHIs then we simplified them all.
3782 PN = dyn_cast<PHINode>(BB->begin());
3783 if (!PN)
3784 return true;
3785
3786 // Don't fold i1 branches on PHIs which contain binary operators or
3787 // (possibly inverted) select form of or/ands if their parameters are
3788 // an equality test.
3789 auto IsBinOpOrAndEq = [](Value *V) {
3790 CmpPredicate Pred;
3791 if (match(V, m_CombineOr(
3793 m_BinOp(m_Cmp(Pred, m_Value(), m_Value()), m_Value()),
3794 m_BinOp(m_Value(), m_Cmp(Pred, m_Value(), m_Value()))),
3796 m_Cmp(Pred, m_Value(), m_Value()))))) {
3797 return CmpInst::isEquality(Pred);
3798 }
3799 return false;
3800 };
3801 if (PN->getType()->isIntegerTy(1) &&
3802 (IsBinOpOrAndEq(PN->getIncomingValue(0)) ||
3803 IsBinOpOrAndEq(PN->getIncomingValue(1)) || IsBinOpOrAndEq(IfCond)))
3804 return Changed;
3805
3806 // If all PHI nodes are promotable, check to make sure that all instructions
3807 // in the predecessor blocks can be promoted as well. If not, we won't be able
3808 // to get rid of the control flow, so it's not worth promoting to select
3809 // instructions.
3810 for (BasicBlock *IfBlock : IfBlocks)
3811 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3812 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3813 // This is not an aggressive instruction that we can promote.
3814 // Because of this, we won't be able to get rid of the control flow, so
3815 // the xform is not worth it.
3816 return Changed;
3817 }
3818
3819 // If either of the blocks has it's address taken, we can't do this fold.
3820 if (any_of(IfBlocks,
3821 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3822 return Changed;
3823
3824 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3825 if (IsUnpredictable) dbgs() << " (unpredictable)";
3826 dbgs() << " T: " << IfTrue->getName()
3827 << " F: " << IfFalse->getName() << "\n");
3828
3829 // If we can still promote the PHI nodes after this gauntlet of tests,
3830 // do all of the PHI's now.
3831
3832 // Move all 'aggressive' instructions, which are defined in the
3833 // conditional parts of the if's up to the dominating block.
3834 for (BasicBlock *IfBlock : IfBlocks)
3835 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3836
3837 IRBuilder<NoFolder> Builder(DomBI);
3838 // Propagate fast-math-flags from phi nodes to replacement selects.
3839 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3840 // Change the PHI node into a select instruction.
3841 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3842 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3843
3844 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3845 isa<FPMathOperator>(PN) ? PN : nullptr,
3846 "", DomBI);
3847 PN->replaceAllUsesWith(Sel);
3848 Sel->takeName(PN);
3849 PN->eraseFromParent();
3850 }
3851
3852 // At this point, all IfBlocks are empty, so our if statement
3853 // has been flattened. Change DomBlock to jump directly to our new block to
3854 // avoid other simplifycfg's kicking in on the diamond.
3855 Builder.CreateBr(BB);
3856
3858 if (DTU) {
3859 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3860 for (auto *Successor : successors(DomBlock))
3861 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3862 }
3863
3864 DomBI->eraseFromParent();
3865 if (DTU)
3866 DTU->applyUpdates(Updates);
3867
3868 return true;
3869}
3870
3873 Value *RHS, const Twine &Name = "") {
3874 // Try to relax logical op to binary op.
3875 if (impliesPoison(RHS, LHS))
3876 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3877 if (Opc == Instruction::And)
3878 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3879 if (Opc == Instruction::Or)
3880 return Builder.CreateLogicalOr(LHS, RHS, Name);
3881 llvm_unreachable("Invalid logical opcode");
3882}
3883
3884/// Return true if either PBI or BI has branch weight available, and store
3885/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3886/// not have branch weight, use 1:1 as its weight.
3888 uint64_t &PredTrueWeight,
3889 uint64_t &PredFalseWeight,
3890 uint64_t &SuccTrueWeight,
3891 uint64_t &SuccFalseWeight) {
3892 bool PredHasWeights =
3893 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3894 bool SuccHasWeights =
3895 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3896 if (PredHasWeights || SuccHasWeights) {
3897 if (!PredHasWeights)
3898 PredTrueWeight = PredFalseWeight = 1;
3899 if (!SuccHasWeights)
3900 SuccTrueWeight = SuccFalseWeight = 1;
3901 return true;
3902 } else {
3903 return false;
3904 }
3905}
3906
3907/// Determine if the two branches share a common destination and deduce a glue
3908/// that joins the branches' conditions to arrive at the common destination if
3909/// that would be profitable.
3910static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3912 const TargetTransformInfo *TTI) {
3913 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3914 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3915 "PredBB must be a predecessor of BB.");
3916
3917 // We have the potential to fold the conditions together, but if the
3918 // predecessor branch is predictable, we may not want to merge them.
3919 uint64_t PTWeight, PFWeight;
3920 BranchProbability PBITrueProb, Likely;
3921 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3922 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3923 (PTWeight + PFWeight) != 0) {
3924 PBITrueProb =
3925 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3926 Likely = TTI->getPredictableBranchThreshold();
3927 }
3928
3929 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3930 // Speculate the 2nd condition unless the 1st is probably true.
3931 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3932 return {{BI->getSuccessor(0), Instruction::Or, false}};
3933 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3934 // Speculate the 2nd condition unless the 1st is probably false.
3935 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3936 return {{BI->getSuccessor(1), Instruction::And, false}};
3937 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3938 // Speculate the 2nd condition unless the 1st is probably true.
3939 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3940 return {{BI->getSuccessor(1), Instruction::And, true}};
3941 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3942 // Speculate the 2nd condition unless the 1st is probably false.
3943 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3944 return {{BI->getSuccessor(0), Instruction::Or, true}};
3945 }
3946 return std::nullopt;
3947}
3948
3950 DomTreeUpdater *DTU,
3951 MemorySSAUpdater *MSSAU,
3952 const TargetTransformInfo *TTI) {
3953 BasicBlock *BB = BI->getParent();
3954 BasicBlock *PredBlock = PBI->getParent();
3955
3956 // Determine if the two branches share a common destination.
3957 BasicBlock *CommonSucc;
3959 bool InvertPredCond;
3960 std::tie(CommonSucc, Opc, InvertPredCond) =
3962
3963 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3964
3965 IRBuilder<> Builder(PBI);
3966 // The builder is used to create instructions to eliminate the branch in BB.
3967 // If BB's terminator has !annotation metadata, add it to the new
3968 // instructions.
3969 Builder.CollectMetadataToCopy(BB->getTerminator(),
3970 {LLVMContext::MD_annotation});
3971
3972 // If we need to invert the condition in the pred block to match, do so now.
3973 if (InvertPredCond) {
3974 InvertBranch(PBI, Builder);
3975 }
3976
3977 BasicBlock *UniqueSucc =
3978 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3979
3980 // Before cloning instructions, notify the successor basic block that it
3981 // is about to have a new predecessor. This will update PHI nodes,
3982 // which will allow us to update live-out uses of bonus instructions.
3983 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3984
3985 // Try to update branch weights.
3986 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3987 SmallVector<uint64_t, 2> MDWeights;
3988 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3989 SuccTrueWeight, SuccFalseWeight)) {
3990
3991 if (PBI->getSuccessor(0) == BB) {
3992 // PBI: br i1 %x, BB, FalseDest
3993 // BI: br i1 %y, UniqueSucc, FalseDest
3994 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3995 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
3996 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3997 // TrueWeight for PBI * FalseWeight for BI.
3998 // We assume that total weights of a CondBrInst can fit into 32 bits.
3999 // Therefore, we will not have overflow using 64-bit arithmetic.
4000 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4001 PredTrueWeight * SuccFalseWeight);
4002 } else {
4003 // PBI: br i1 %x, TrueDest, BB
4004 // BI: br i1 %y, TrueDest, UniqueSucc
4005 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4006 // FalseWeight for PBI * TrueWeight for BI.
4007 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4008 PredFalseWeight * SuccTrueWeight);
4009 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4010 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4011 }
4012
4013 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4014 /*ElideAllZero=*/true);
4015
4016 // TODO: If BB is reachable from all paths through PredBlock, then we
4017 // could replace PBI's branch probabilities with BI's.
4018 } else
4019 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4020
4021 // Now, update the CFG.
4022 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4023
4024 if (DTU)
4025 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4026 {DominatorTree::Delete, PredBlock, BB}});
4027
4028 // If BI was a loop latch, it may have had associated loop metadata.
4029 // We need to copy it to the new latch, that is, PBI.
4030 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4031 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4032
4033 ValueToValueMapTy VMap; // maps original values to cloned values
4035
4036 Module *M = BB->getModule();
4037
4038 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4039 for (DbgVariableRecord &DVR :
4041 RemapDbgRecord(M, &DVR, VMap,
4043 }
4044
4045 // Now that the Cond was cloned into the predecessor basic block,
4046 // or/and the two conditions together.
4047 Value *BICond = VMap[BI->getCondition()];
4048 PBI->setCondition(
4049 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4051 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4052 if (!MDWeights.empty()) {
4053 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4054 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4055 /*IsExpected=*/false, /*ElideAllZero=*/true);
4056 }
4057
4058 ++NumFoldBranchToCommonDest;
4059 return true;
4060}
4061
4062/// Return if an instruction's type or any of its operands' types are a vector
4063/// type.
4064static bool isVectorOp(Instruction &I) {
4065 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4066 return U->getType()->isVectorTy();
4067 });
4068}
4069
4070/// If this basic block is simple enough, and if a predecessor branches to us
4071/// and one of our successors, fold the block into the predecessor and use
4072/// logical operations to pick the right destination.
4074 MemorySSAUpdater *MSSAU,
4075 const TargetTransformInfo *TTI,
4076 unsigned BonusInstThreshold) {
4077 BasicBlock *BB = BI->getParent();
4081
4083
4085 Cond->getParent() != BB || !Cond->hasOneUse())
4086 return false;
4087
4088 // Finally, don't infinitely unroll conditional loops.
4089 if (is_contained(successors(BB), BB))
4090 return false;
4091
4092 // With which predecessors will we want to deal with?
4094 for (BasicBlock *PredBlock : predecessors(BB)) {
4095 CondBrInst *PBI = dyn_cast<CondBrInst>(PredBlock->getTerminator());
4096
4097 // Check that we have two conditional branches. If there is a PHI node in
4098 // the common successor, verify that the same value flows in from both
4099 // blocks.
4100 if (!PBI || !safeToMergeTerminators(BI, PBI))
4101 continue;
4102
4103 // Determine if the two branches share a common destination.
4104 BasicBlock *CommonSucc;
4106 bool InvertPredCond;
4107 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4108 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4109 else
4110 continue;
4111
4112 // Check the cost of inserting the necessary logic before performing the
4113 // transformation.
4114 if (TTI) {
4115 Type *Ty = BI->getCondition()->getType();
4116 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4117 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4118 !isa<CmpInst>(PBI->getCondition())))
4119 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4120
4122 continue;
4123 }
4124
4125 // Ok, we do want to deal with this predecessor. Record it.
4126 Preds.emplace_back(PredBlock);
4127 }
4128
4129 // If there aren't any predecessors into which we can fold,
4130 // don't bother checking the cost.
4131 if (Preds.empty())
4132 return false;
4133
4134 // Only allow this transformation if computing the condition doesn't involve
4135 // too many instructions and these involved instructions can be executed
4136 // unconditionally. We denote all involved instructions except the condition
4137 // as "bonus instructions", and only allow this transformation when the
4138 // number of the bonus instructions we'll need to create when cloning into
4139 // each predecessor does not exceed a certain threshold.
4140 unsigned NumBonusInsts = 0;
4141 bool SawVectorOp = false;
4142 const unsigned PredCount = Preds.size();
4143 for (Instruction &I : *BB) {
4144 // Don't check the branch condition comparison itself.
4145 if (&I == Cond)
4146 continue;
4147 // Ignore the terminator.
4149 continue;
4150 // I must be safe to execute unconditionally.
4152 return false;
4153 SawVectorOp |= isVectorOp(I);
4154
4155 // Account for the cost of duplicating this instruction into each
4156 // predecessor. Ignore free instructions.
4157 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4159 NumBonusInsts += PredCount;
4160
4161 // Early exits once we reach the limit.
4162 if (NumBonusInsts >
4163 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4164 return false;
4165 }
4166
4167 auto IsBCSSAUse = [BB, &I](Use &U) {
4168 auto *UI = cast<Instruction>(U.getUser());
4169 if (auto *PN = dyn_cast<PHINode>(UI))
4170 return PN->getIncomingBlock(U) == BB;
4171 return UI->getParent() == BB && I.comesBefore(UI);
4172 };
4173
4174 // Does this instruction require rewriting of uses?
4175 if (!all_of(I.uses(), IsBCSSAUse))
4176 return false;
4177 }
4178 if (NumBonusInsts >
4179 BonusInstThreshold *
4180 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4181 return false;
4182
4183 // Ok, we have the budget. Perform the transformation.
4184 for (BasicBlock *PredBlock : Preds) {
4185 auto *PBI = cast<CondBrInst>(PredBlock->getTerminator());
4186 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4187 }
4188 return false;
4189}
4190
4191// If there is only one store in BB1 and BB2, return it, otherwise return
4192// nullptr.
4194 StoreInst *S = nullptr;
4195 for (auto *BB : {BB1, BB2}) {
4196 if (!BB)
4197 continue;
4198 for (auto &I : *BB)
4199 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4200 if (S)
4201 // Multiple stores seen.
4202 return nullptr;
4203 else
4204 S = SI;
4205 }
4206 }
4207 return S;
4208}
4209
4211 Value *AlternativeV = nullptr) {
4212 // PHI is going to be a PHI node that allows the value V that is defined in
4213 // BB to be referenced in BB's only successor.
4214 //
4215 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4216 // doesn't matter to us what the other operand is (it'll never get used). We
4217 // could just create a new PHI with an undef incoming value, but that could
4218 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4219 // other PHI. So here we directly look for some PHI in BB's successor with V
4220 // as an incoming operand. If we find one, we use it, else we create a new
4221 // one.
4222 //
4223 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4224 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4225 // where OtherBB is the single other predecessor of BB's only successor.
4226 PHINode *PHI = nullptr;
4227 BasicBlock *Succ = BB->getSingleSuccessor();
4228
4229 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4230 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4231 PHI = cast<PHINode>(I);
4232 if (!AlternativeV)
4233 break;
4234
4235 assert(Succ->hasNPredecessors(2));
4236 auto PredI = pred_begin(Succ);
4237 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4238 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4239 break;
4240 PHI = nullptr;
4241 }
4242 if (PHI)
4243 return PHI;
4244
4245 // If V is not an instruction defined in BB, just return it.
4246 if (!AlternativeV &&
4247 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4248 return V;
4249
4250 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4251 PHI->insertBefore(Succ->begin());
4252 PHI->addIncoming(V, BB);
4253 for (BasicBlock *PredBB : predecessors(Succ))
4254 if (PredBB != BB)
4255 PHI->addIncoming(
4256 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4257 return PHI;
4258}
4259
4261 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4262 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4263 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4264 // For every pointer, there must be exactly two stores, one coming from
4265 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4266 // store (to any address) in PTB,PFB or QTB,QFB.
4267 // FIXME: We could relax this restriction with a bit more work and performance
4268 // testing.
4269 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4270 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4271 if (!PStore || !QStore)
4272 return false;
4273
4274 // Now check the stores are compatible.
4275 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4276 PStore->getValueOperand()->getType() !=
4277 QStore->getValueOperand()->getType())
4278 return false;
4279
4280 // Check that sinking the store won't cause program behavior changes. Sinking
4281 // the store out of the Q blocks won't change any behavior as we're sinking
4282 // from a block to its unconditional successor. But we're moving a store from
4283 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4284 // So we need to check that there are no aliasing loads or stores in
4285 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4286 // operations between PStore and the end of its parent block.
4287 //
4288 // The ideal way to do this is to query AliasAnalysis, but we don't
4289 // preserve AA currently so that is dangerous. Be super safe and just
4290 // check there are no other memory operations at all.
4291 for (auto &I : *QFB->getSinglePredecessor())
4292 if (I.mayReadOrWriteMemory())
4293 return false;
4294 for (auto &I : *QFB)
4295 if (&I != QStore && I.mayReadOrWriteMemory())
4296 return false;
4297 if (QTB)
4298 for (auto &I : *QTB)
4299 if (&I != QStore && I.mayReadOrWriteMemory())
4300 return false;
4301 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4302 I != E; ++I)
4303 if (&*I != PStore && I->mayReadOrWriteMemory())
4304 return false;
4305
4306 // If we're not in aggressive mode, we only optimize if we have some
4307 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4308 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4309 if (!BB)
4310 return true;
4311 // Heuristic: if the block can be if-converted/phi-folded and the
4312 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4313 // thread this store.
4314 InstructionCost Cost = 0;
4315 InstructionCost Budget =
4317 for (auto &I : *BB) {
4318 // Consider terminator instruction to be free.
4319 if (I.isTerminator())
4320 continue;
4321 // If this is one the stores that we want to speculate out of this BB,
4322 // then don't count it's cost, consider it to be free.
4323 if (auto *S = dyn_cast<StoreInst>(&I))
4324 if (llvm::find(FreeStores, S))
4325 continue;
4326 // Else, we have a white-list of instructions that we are ak speculating.
4328 return false; // Not in white-list - not worthwhile folding.
4329 // And finally, if this is a non-free instruction that we are okay
4330 // speculating, ensure that we consider the speculation budget.
4331 Cost +=
4332 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4333 if (Cost > Budget)
4334 return false; // Eagerly refuse to fold as soon as we're out of budget.
4335 }
4336 assert(Cost <= Budget &&
4337 "When we run out of budget we will eagerly return from within the "
4338 "per-instruction loop.");
4339 return true;
4340 };
4341
4342 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4344 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4345 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4346 return false;
4347
4348 // If PostBB has more than two predecessors, we need to split it so we can
4349 // sink the store.
4350 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4351 // We know that QFB's only successor is PostBB. And QFB has a single
4352 // predecessor. If QTB exists, then its only successor is also PostBB.
4353 // If QTB does not exist, then QFB's only predecessor has a conditional
4354 // branch to QFB and PostBB.
4355 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4356 BasicBlock *NewBB =
4357 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4358 if (!NewBB)
4359 return false;
4360 PostBB = NewBB;
4361 }
4362
4363 // OK, we're going to sink the stores to PostBB. The store has to be
4364 // conditional though, so first create the predicate.
4365 CondBrInst *PBranch =
4367 CondBrInst *QBranch =
4369 Value *PCond = PBranch->getCondition();
4370 Value *QCond = QBranch->getCondition();
4371
4373 PStore->getParent());
4375 QStore->getParent(), PPHI);
4376
4377 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4378 IRBuilder<> QB(PostBB, PostBBFirst);
4379 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4380
4381 InvertPCond ^= (PStore->getParent() != PTB);
4382 InvertQCond ^= (QStore->getParent() != QTB);
4383 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4384 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4385
4386 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4387
4388 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4389 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4390 /*Unreachable=*/false,
4391 /*BranchWeights=*/nullptr, DTU);
4392 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4394 SmallVector<uint32_t, 2> PWeights, QWeights;
4395 extractBranchWeights(*PBranch, PWeights);
4396 extractBranchWeights(*QBranch, QWeights);
4397 if (InvertPCond)
4398 std::swap(PWeights[0], PWeights[1]);
4399 if (InvertQCond)
4400 std::swap(QWeights[0], QWeights[1]);
4401 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4403 {CombinedWeights[0], CombinedWeights[1]},
4404 /*IsExpected=*/false, /*ElideAllZero=*/true);
4405 }
4406
4407 QB.SetInsertPoint(T);
4408 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4409 combineMetadataForCSE(QStore, PStore, true);
4410 SI->copyMetadata(*QStore);
4411 // Update any dbg.assign intrinsics to track the merged value (QPHI) instead
4412 // of the original constant values, likely making these identical.
4413 for (auto *DbgAssign : at::getDVRAssignmentMarkers(SI)) {
4414 if (llvm::is_contained(DbgAssign->location_ops(),
4415 PStore->getValueOperand()))
4416 DbgAssign->replaceVariableLocationOp(PStore->getValueOperand(), QPHI);
4417 if (llvm::is_contained(DbgAssign->location_ops(),
4418 QStore->getValueOperand()))
4419 DbgAssign->replaceVariableLocationOp(QStore->getValueOperand(), QPHI);
4420 }
4421
4422 // Choose the minimum alignment. If we could prove both stores execute, we
4423 // could use biggest one. In this case, though, we only know that one of the
4424 // stores executes. And we don't know it's safe to take the alignment from a
4425 // store that doesn't execute.
4426 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4427
4428 QStore->eraseFromParent();
4429 PStore->eraseFromParent();
4430
4431 return true;
4432}
4433
4435 DomTreeUpdater *DTU, const DataLayout &DL,
4436 const TargetTransformInfo &TTI) {
4437 // The intention here is to find diamonds or triangles (see below) where each
4438 // conditional block contains a store to the same address. Both of these
4439 // stores are conditional, so they can't be unconditionally sunk. But it may
4440 // be profitable to speculatively sink the stores into one merged store at the
4441 // end, and predicate the merged store on the union of the two conditions of
4442 // PBI and QBI.
4443 //
4444 // This can reduce the number of stores executed if both of the conditions are
4445 // true, and can allow the blocks to become small enough to be if-converted.
4446 // This optimization will also chain, so that ladders of test-and-set
4447 // sequences can be if-converted away.
4448 //
4449 // We only deal with simple diamonds or triangles:
4450 //
4451 // PBI or PBI or a combination of the two
4452 // / \ | \
4453 // PTB PFB | PFB
4454 // \ / | /
4455 // QBI QBI
4456 // / \ | \
4457 // QTB QFB | QFB
4458 // \ / | /
4459 // PostBB PostBB
4460 //
4461 // We model triangles as a type of diamond with a nullptr "true" block.
4462 // Triangles are canonicalized so that the fallthrough edge is represented by
4463 // a true condition, as in the diagram above.
4464 BasicBlock *PTB = PBI->getSuccessor(0);
4465 BasicBlock *PFB = PBI->getSuccessor(1);
4466 BasicBlock *QTB = QBI->getSuccessor(0);
4467 BasicBlock *QFB = QBI->getSuccessor(1);
4468 BasicBlock *PostBB = QFB->getSingleSuccessor();
4469
4470 // Make sure we have a good guess for PostBB. If QTB's only successor is
4471 // QFB, then QFB is a better PostBB.
4472 if (QTB->getSingleSuccessor() == QFB)
4473 PostBB = QFB;
4474
4475 // If we couldn't find a good PostBB, stop.
4476 if (!PostBB)
4477 return false;
4478
4479 bool InvertPCond = false, InvertQCond = false;
4480 // Canonicalize fallthroughs to the true branches.
4481 if (PFB == QBI->getParent()) {
4482 std::swap(PFB, PTB);
4483 InvertPCond = true;
4484 }
4485 if (QFB == PostBB) {
4486 std::swap(QFB, QTB);
4487 InvertQCond = true;
4488 }
4489
4490 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4491 // and QFB may not. Model fallthroughs as a nullptr block.
4492 if (PTB == QBI->getParent())
4493 PTB = nullptr;
4494 if (QTB == PostBB)
4495 QTB = nullptr;
4496
4497 // Legality bailouts. We must have at least the non-fallthrough blocks and
4498 // the post-dominating block, and the non-fallthroughs must only have one
4499 // predecessor.
4500 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4501 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4502 };
4503 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4504 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4505 return false;
4506 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4507 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4508 return false;
4509 if (!QBI->getParent()->hasNUses(2))
4510 return false;
4511
4512 // OK, this is a sequence of two diamonds or triangles.
4513 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4514 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4515 for (auto *BB : {PTB, PFB}) {
4516 if (!BB)
4517 continue;
4518 for (auto &I : *BB)
4520 PStoreAddresses.insert(SI->getPointerOperand());
4521 }
4522 for (auto *BB : {QTB, QFB}) {
4523 if (!BB)
4524 continue;
4525 for (auto &I : *BB)
4527 QStoreAddresses.insert(SI->getPointerOperand());
4528 }
4529
4530 set_intersect(PStoreAddresses, QStoreAddresses);
4531 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4532 // clear what it contains.
4533 auto &CommonAddresses = PStoreAddresses;
4534
4535 bool Changed = false;
4536 for (auto *Address : CommonAddresses)
4537 Changed |=
4538 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4539 InvertPCond, InvertQCond, DTU, DL, TTI);
4540 return Changed;
4541}
4542
4543/// If the previous block ended with a widenable branch, determine if reusing
4544/// the target block is profitable and legal. This will have the effect of
4545/// "widening" PBI, but doesn't require us to reason about hosting safety.
4547 DomTreeUpdater *DTU) {
4548 // TODO: This can be generalized in two important ways:
4549 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4550 // values from the PBI edge.
4551 // 2) We can sink side effecting instructions into BI's fallthrough
4552 // successor provided they doesn't contribute to computation of
4553 // BI's condition.
4554 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4555 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4556 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4557 !BI->getParent()->getSinglePredecessor())
4558 return false;
4559 if (!IfFalseBB->phis().empty())
4560 return false; // TODO
4561 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4562 // may undo the transform done here.
4563 // TODO: There might be a more fine-grained solution to this.
4564 if (!llvm::succ_empty(IfFalseBB))
4565 return false;
4566 // Use lambda to lazily compute expensive condition after cheap ones.
4567 auto NoSideEffects = [](BasicBlock &BB) {
4568 return llvm::none_of(BB, [](const Instruction &I) {
4569 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4570 });
4571 };
4572 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4573 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4574 NoSideEffects(*BI->getParent())) {
4575 auto *OldSuccessor = BI->getSuccessor(1);
4576 OldSuccessor->removePredecessor(BI->getParent());
4577 BI->setSuccessor(1, IfFalseBB);
4578 if (DTU)
4579 DTU->applyUpdates(
4580 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4581 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4582 return true;
4583 }
4584 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4585 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4586 NoSideEffects(*BI->getParent())) {
4587 auto *OldSuccessor = BI->getSuccessor(0);
4588 OldSuccessor->removePredecessor(BI->getParent());
4589 BI->setSuccessor(0, IfFalseBB);
4590 if (DTU)
4591 DTU->applyUpdates(
4592 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4593 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4594 return true;
4595 }
4596 return false;
4597}
4598
4599/// If we have a conditional branch as a predecessor of another block,
4600/// this function tries to simplify it. We know
4601/// that PBI and BI are both conditional branches, and BI is in one of the
4602/// successor blocks of PBI - PBI branches to BI.
4604 DomTreeUpdater *DTU,
4605 const DataLayout &DL,
4606 const TargetTransformInfo &TTI) {
4607 BasicBlock *BB = BI->getParent();
4608
4609 // If this block ends with a branch instruction, and if there is a
4610 // predecessor that ends on a branch of the same condition, make
4611 // this conditional branch redundant.
4612 if (PBI->getCondition() == BI->getCondition() &&
4613 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4614 // Okay, the outcome of this conditional branch is statically
4615 // knowable. If this block had a single pred, handle specially, otherwise
4616 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4617 if (BB->getSinglePredecessor()) {
4618 // Turn this into a branch on constant.
4619 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4620 BI->setCondition(
4621 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4622 return true; // Nuke the branch on constant.
4623 }
4624 }
4625
4626 // If the previous block ended with a widenable branch, determine if reusing
4627 // the target block is profitable and legal. This will have the effect of
4628 // "widening" PBI, but doesn't require us to reason about hosting safety.
4629 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4630 return true;
4631
4632 // If both branches are conditional and both contain stores to the same
4633 // address, remove the stores from the conditionals and create a conditional
4634 // merged store at the end.
4635 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4636 return true;
4637
4638 // If this is a conditional branch in an empty block, and if any
4639 // predecessors are a conditional branch to one of our destinations,
4640 // fold the conditions into logical ops and one cond br.
4641
4642 // Ignore dbg intrinsics.
4643 if (&*BB->begin() != BI)
4644 return false;
4645
4646 int PBIOp, BIOp;
4647 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4648 PBIOp = 0;
4649 BIOp = 0;
4650 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4651 PBIOp = 0;
4652 BIOp = 1;
4653 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4654 PBIOp = 1;
4655 BIOp = 0;
4656 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4657 PBIOp = 1;
4658 BIOp = 1;
4659 } else {
4660 return false;
4661 }
4662
4663 // Check to make sure that the other destination of this branch
4664 // isn't BB itself. If so, this is an infinite loop that will
4665 // keep getting unwound.
4666 if (PBI->getSuccessor(PBIOp) == BB)
4667 return false;
4668
4669 // If predecessor's branch probability to BB is too low don't merge branches.
4670 SmallVector<uint32_t, 2> PredWeights;
4671 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4672 extractBranchWeights(*PBI, PredWeights) &&
4673 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4674
4676 PredWeights[PBIOp],
4677 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4678
4679 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4680 if (CommonDestProb >= Likely)
4681 return false;
4682 }
4683
4684 // Do not perform this transformation if it would require
4685 // insertion of a large number of select instructions. For targets
4686 // without predication/cmovs, this is a big pessimization.
4687
4688 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4689 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4690 unsigned NumPhis = 0;
4691 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4692 ++II, ++NumPhis) {
4693 if (NumPhis > 2) // Disable this xform.
4694 return false;
4695 }
4696
4697 // Finally, if everything is ok, fold the branches to logical ops.
4698 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4699
4700 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4701 << "AND: " << *BI->getParent());
4702
4704
4705 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4706 // branch in it, where one edge (OtherDest) goes back to itself but the other
4707 // exits. We don't *know* that the program avoids the infinite loop
4708 // (even though that seems likely). If we do this xform naively, we'll end up
4709 // recursively unpeeling the loop. Since we know that (after the xform is
4710 // done) that the block *is* infinite if reached, we just make it an obviously
4711 // infinite loop with no cond branch.
4712 if (OtherDest == BB) {
4713 // Insert it at the end of the function, because it's either code,
4714 // or it won't matter if it's hot. :)
4715 BasicBlock *InfLoopBlock =
4716 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4717 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
4718 if (DTU)
4719 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4720 OtherDest = InfLoopBlock;
4721 }
4722
4723 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4724
4725 // BI may have other predecessors. Because of this, we leave
4726 // it alone, but modify PBI.
4727
4728 // Make sure we get to CommonDest on True&True directions.
4729 Value *PBICond = PBI->getCondition();
4730 IRBuilder<NoFolder> Builder(PBI);
4731 if (PBIOp)
4732 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4733
4734 Value *BICond = BI->getCondition();
4735 if (BIOp)
4736 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4737
4738 // Merge the conditions.
4739 Value *Cond =
4740 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4741
4742 // Modify PBI to branch on the new condition to the new dests.
4743 PBI->setCondition(Cond);
4744 PBI->setSuccessor(0, CommonDest);
4745 PBI->setSuccessor(1, OtherDest);
4746
4747 if (DTU) {
4748 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4749 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4750
4751 DTU->applyUpdates(Updates);
4752 }
4753
4754 // Update branch weight for PBI.
4755 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4756 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4757 bool HasWeights =
4758 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4759 SuccTrueWeight, SuccFalseWeight);
4760 if (HasWeights) {
4761 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4762 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4763 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4764 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4765 // The weight to CommonDest should be PredCommon * SuccTotal +
4766 // PredOther * SuccCommon.
4767 // The weight to OtherDest should be PredOther * SuccOther.
4768 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4769 PredOther * SuccCommon,
4770 PredOther * SuccOther};
4771
4772 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4773 /*ElideAllZero=*/true);
4774 // Cond may be a select instruction with the first operand set to "true", or
4775 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4777 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4778 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4779 // The select is predicated on PBICond
4780 assert(SI->getCondition() == PBICond);
4781 // The corresponding probabilities are what was referred to above as
4782 // PredCommon and PredOther.
4783 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4784 /*IsExpected=*/false, /*ElideAllZero=*/true);
4785 }
4786 }
4787
4788 // OtherDest may have phi nodes. If so, add an entry from PBI's
4789 // block that are identical to the entries for BI's block.
4790 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4791
4792 // We know that the CommonDest already had an edge from PBI to
4793 // it. If it has PHIs though, the PHIs may have different
4794 // entries for BB and PBI's BB. If so, insert a select to make
4795 // them agree.
4796 for (PHINode &PN : CommonDest->phis()) {
4797 Value *BIV = PN.getIncomingValueForBlock(BB);
4798 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4799 Value *PBIV = PN.getIncomingValue(PBBIdx);
4800 if (BIV != PBIV) {
4801 // Insert a select in PBI to pick the right value.
4803 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4804 PN.setIncomingValue(PBBIdx, NV);
4805 // The select has the same condition as PBI, in the same BB. The
4806 // probabilities don't change.
4807 if (HasWeights) {
4808 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4809 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4810 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4811 /*IsExpected=*/false, /*ElideAllZero=*/true);
4812 }
4813 }
4814 }
4815
4816 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4817 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4818
4819 // This basic block is probably dead. We know it has at least
4820 // one fewer predecessor.
4821 return true;
4822}
4823
4824// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4825// true or to FalseBB if Cond is false.
4826// Takes care of updating the successors and removing the old terminator.
4827// Also makes sure not to introduce new successors by assuming that edges to
4828// non-successor TrueBBs and FalseBBs aren't reachable.
4829bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4830 Value *Cond, BasicBlock *TrueBB,
4831 BasicBlock *FalseBB,
4832 uint32_t TrueWeight,
4833 uint32_t FalseWeight) {
4834 auto *BB = OldTerm->getParent();
4835 // Remove any superfluous successor edges from the CFG.
4836 // First, figure out which successors to preserve.
4837 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4838 // successor.
4839 BasicBlock *KeepEdge1 = TrueBB;
4840 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4841
4842 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4843
4844 // Then remove the rest.
4845 for (BasicBlock *Succ : successors(OldTerm)) {
4846 // Make sure only to keep exactly one copy of each edge.
4847 if (Succ == KeepEdge1)
4848 KeepEdge1 = nullptr;
4849 else if (Succ == KeepEdge2)
4850 KeepEdge2 = nullptr;
4851 else {
4852 Succ->removePredecessor(BB,
4853 /*KeepOneInputPHIs=*/true);
4854
4855 if (Succ != TrueBB && Succ != FalseBB)
4856 RemovedSuccessors.insert(Succ);
4857 }
4858 }
4859
4860 IRBuilder<> Builder(OldTerm);
4861 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4862
4863 // Insert an appropriate new terminator.
4864 if (!KeepEdge1 && !KeepEdge2) {
4865 if (TrueBB == FalseBB) {
4866 // We were only looking for one successor, and it was present.
4867 // Create an unconditional branch to it.
4868 Builder.CreateBr(TrueBB);
4869 } else {
4870 // We found both of the successors we were looking for.
4871 // Create a conditional branch sharing the condition of the select.
4872 CondBrInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4873 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4874 /*IsExpected=*/false, /*ElideAllZero=*/true);
4875 }
4876 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4877 // Neither of the selected blocks were successors, so this
4878 // terminator must be unreachable.
4879 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4880 } else {
4881 // One of the selected values was a successor, but the other wasn't.
4882 // Insert an unconditional branch to the one that was found;
4883 // the edge to the one that wasn't must be unreachable.
4884 if (!KeepEdge1) {
4885 // Only TrueBB was found.
4886 Builder.CreateBr(TrueBB);
4887 } else {
4888 // Only FalseBB was found.
4889 Builder.CreateBr(FalseBB);
4890 }
4891 }
4892
4894
4895 if (DTU) {
4896 SmallVector<DominatorTree::UpdateType, 2> Updates;
4897 Updates.reserve(RemovedSuccessors.size());
4898 for (auto *RemovedSuccessor : RemovedSuccessors)
4899 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4900 DTU->applyUpdates(Updates);
4901 }
4902
4903 return true;
4904}
4905
4906// Replaces
4907// (switch (select cond, X, Y)) on constant X, Y
4908// with a branch - conditional if X and Y lead to distinct BBs,
4909// unconditional otherwise.
4910bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4911 SelectInst *Select) {
4912 // Check for constant integer values in the select.
4913 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4914 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4915 if (!TrueVal || !FalseVal)
4916 return false;
4917
4918 // Find the relevant condition and destinations.
4919 Value *Condition = Select->getCondition();
4920 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4921 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4922
4923 // Get weight for TrueBB and FalseBB.
4924 uint32_t TrueWeight = 0, FalseWeight = 0;
4925 SmallVector<uint64_t, 8> Weights;
4926 bool HasWeights = hasBranchWeightMD(*SI);
4927 if (HasWeights) {
4928 getBranchWeights(SI, Weights);
4929 if (Weights.size() == 1 + SI->getNumCases()) {
4930 TrueWeight =
4931 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4932 FalseWeight =
4933 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4934 }
4935 }
4936
4937 // Perform the actual simplification.
4938 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4939 FalseWeight);
4940}
4941
4942// Replaces
4943// (indirectbr (select cond, blockaddress(@fn, BlockA),
4944// blockaddress(@fn, BlockB)))
4945// with
4946// (br cond, BlockA, BlockB).
4947bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4948 SelectInst *SI) {
4949 // Check that both operands of the select are block addresses.
4950 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4951 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4952 if (!TBA || !FBA)
4953 return false;
4954
4955 // Extract the actual blocks.
4956 BasicBlock *TrueBB = TBA->getBasicBlock();
4957 BasicBlock *FalseBB = FBA->getBasicBlock();
4958
4959 // The select's profile becomes the profile of the conditional branch that
4960 // replaces the indirect branch.
4961 SmallVector<uint32_t> SelectBranchWeights(2);
4963 extractBranchWeights(*SI, SelectBranchWeights);
4964 // Perform the actual simplification.
4965 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
4966 SelectBranchWeights[0],
4967 SelectBranchWeights[1]);
4968}
4969
4970/// This is called when we find an icmp instruction
4971/// (a seteq/setne with a constant) as the only instruction in a
4972/// block that ends with an uncond branch. We are looking for a very specific
4973/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4974/// this case, we merge the first two "or's of icmp" into a switch, but then the
4975/// default value goes to an uncond block with a seteq in it, we get something
4976/// like:
4977///
4978/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4979/// DEFAULT:
4980/// %tmp = icmp eq i8 %A, 92
4981/// br label %end
4982/// end:
4983/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4984///
4985/// We prefer to split the edge to 'end' so that there is a true/false entry to
4986/// the PHI, merging the third icmp into the switch.
4987bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4988 ICmpInst *ICI, IRBuilder<> &Builder) {
4989 // Select == nullptr means we assume that there is a hidden no-op select
4990 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
4991 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
4992}
4993
4994/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
4995/// case. This is called when we find an icmp instruction (a seteq/setne with a
4996/// constant) and its following select instruction as the only TWO instructions
4997/// in a block that ends with an uncond branch. We are looking for a very
4998/// specific pattern that occurs when "
4999/// if (A == 1) return C1;
5000/// if (A == 2) return C2;
5001/// if (A < 3) return C3;
5002/// return C4;
5003/// " gets simplified. In this case, we merge the first two "branches of icmp"
5004/// into a switch, but then the default value goes to an uncond block with a lt
5005/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5006/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5007/// get something like:
5008///
5009/// case1:
5010/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5011/// case2:
5012/// br label %end
5013/// DEFAULT:
5014/// %tmp = icmp eq i8 %A, 2
5015/// %val = select i1 %tmp, i8 C3, i8 C4
5016/// br label %end
5017/// end:
5018/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5019///
5020/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5021/// to the PHI, merging the icmp & select into the switch, as follows:
5022///
5023/// case1:
5024/// switch i8 %A, label %DEFAULT [
5025/// i8 0, label %end
5026/// i8 1, label %case2
5027/// i8 2, label %case3
5028/// ]
5029/// case2:
5030/// br label %end
5031/// case3:
5032/// br label %end
5033/// DEFAULT:
5034/// br label %end
5035/// end:
5036/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5037bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5038 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5039 BasicBlock *BB = ICI->getParent();
5040
5041 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5042 // too complex.
5043 /// TODO: support multi-phis in succ BB of select's BB.
5044 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5045 (Select && !Select->hasOneUse()))
5046 return false;
5047
5048 // The pattern we're looking for is where our only predecessor is a switch on
5049 // 'V' and this block is the default case for the switch. In this case we can
5050 // fold the compared value into the switch to simplify things.
5051 BasicBlock *Pred = BB->getSinglePredecessor();
5052 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5053 return false;
5054
5055 Value *IcmpCond;
5056 ConstantInt *NewCaseVal;
5057 CmpPredicate Predicate;
5058
5059 // Match icmp X, C
5060 if (!match(ICI,
5061 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5062 return false;
5063
5064 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5066 if (!Select) {
5067 // If Select == nullptr, we can assume that there is a hidden no-op select
5068 // just after icmp
5069 SelectCond = ICI;
5070 SelectTrueVal = Builder.getTrue();
5071 SelectFalseVal = Builder.getFalse();
5072 User = ICI->user_back();
5073 } else {
5074 SelectCond = Select->getCondition();
5075 // Check if the select condition is the same as the icmp condition.
5076 if (SelectCond != ICI)
5077 return false;
5078 SelectTrueVal = Select->getTrueValue();
5079 SelectFalseVal = Select->getFalseValue();
5080 User = Select->user_back();
5081 }
5082
5083 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5084 if (SI->getCondition() != IcmpCond)
5085 return false;
5086
5087 // If BB is reachable on a non-default case, then we simply know the value of
5088 // V in this block. Substitute it and constant fold the icmp instruction
5089 // away.
5090 if (SI->getDefaultDest() != BB) {
5091 ConstantInt *VVal = SI->findCaseDest(BB);
5092 assert(VVal && "Should have a unique destination value");
5093 ICI->setOperand(0, VVal);
5094
5095 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5096 ICI->replaceAllUsesWith(V);
5097 ICI->eraseFromParent();
5098 }
5099 // BB is now empty, so it is likely to simplify away.
5100 return requestResimplify();
5101 }
5102
5103 // Ok, the block is reachable from the default dest. If the constant we're
5104 // comparing exists in one of the other edges, then we can constant fold ICI
5105 // and zap it.
5106 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5107 Value *V;
5108 if (Predicate == ICmpInst::ICMP_EQ)
5110 else
5112
5113 ICI->replaceAllUsesWith(V);
5114 ICI->eraseFromParent();
5115 // BB is now empty, so it is likely to simplify away.
5116 return requestResimplify();
5117 }
5118
5119 // The use of the select has to be in the 'end' block, by the only PHI node in
5120 // the block.
5121 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5122 PHINode *PHIUse = dyn_cast<PHINode>(User);
5123 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5125 return false;
5126
5127 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5128 // edge gets SelectTrueVal in the PHI.
5129 Value *DefaultCst = SelectFalseVal;
5130 Value *NewCst = SelectTrueVal;
5131
5132 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5133 std::swap(DefaultCst, NewCst);
5134
5135 // Replace Select (which is used by the PHI for the default value) with
5136 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5137 if (Select) {
5138 Select->replaceAllUsesWith(DefaultCst);
5139 Select->eraseFromParent();
5140 } else {
5141 ICI->replaceAllUsesWith(DefaultCst);
5142 }
5143 ICI->eraseFromParent();
5144
5145 SmallVector<DominatorTree::UpdateType, 2> Updates;
5146
5147 // Okay, the switch goes to this block on a default value. Add an edge from
5148 // the switch to the merge point on the compared value.
5149 BasicBlock *NewBB =
5150 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5151 {
5152 SwitchInstProfUpdateWrapper SIW(*SI);
5153 auto W0 = SIW.getSuccessorWeight(0);
5155 if (W0) {
5156 NewW = ((uint64_t(*W0) + 1) >> 1);
5157 SIW.setSuccessorWeight(0, *NewW);
5158 }
5159 SIW.addCase(NewCaseVal, NewBB, NewW);
5160 if (DTU)
5161 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5162 }
5163
5164 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5165 Builder.SetInsertPoint(NewBB);
5166 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5167 Builder.CreateBr(SuccBlock);
5168 PHIUse->addIncoming(NewCst, NewBB);
5169 if (DTU) {
5170 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5171 DTU->applyUpdates(Updates);
5172 }
5173 return true;
5174}
5175
5176/// Check to see if it is branching on an or/and chain of icmp instructions, and
5177/// fold it into a switch instruction if so.
5178bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5179 IRBuilder<> &Builder,
5180 const DataLayout &DL) {
5182 if (!Cond)
5183 return false;
5184
5185 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5186 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5187 // 'setne's and'ed together, collect them.
5188
5189 // Try to gather values from a chain of and/or to be turned into a switch
5190 ConstantComparesGatherer ConstantCompare(Cond, DL);
5191 // Unpack the result
5192 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5193 Value *CompVal = ConstantCompare.CompValue;
5194 unsigned UsedICmps = ConstantCompare.UsedICmps;
5195 Value *ExtraCase = ConstantCompare.Extra;
5196 bool TrueWhenEqual = ConstantCompare.IsEq;
5197
5198 // If we didn't have a multiply compared value, fail.
5199 if (!CompVal)
5200 return false;
5201
5202 // Avoid turning single icmps into a switch.
5203 if (UsedICmps <= 1)
5204 return false;
5205
5206 // There might be duplicate constants in the list, which the switch
5207 // instruction can't handle, remove them now.
5208 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5209 Values.erase(llvm::unique(Values), Values.end());
5210
5211 // If Extra was used, we require at least two switch values to do the
5212 // transformation. A switch with one value is just a conditional branch.
5213 if (ExtraCase && Values.size() < 2)
5214 return false;
5215
5216 SmallVector<uint32_t> BranchWeights;
5217 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5218 extractBranchWeights(*BI, BranchWeights);
5219
5220 // Figure out which block is which destination.
5221 BasicBlock *DefaultBB = BI->getSuccessor(1);
5222 BasicBlock *EdgeBB = BI->getSuccessor(0);
5223 if (!TrueWhenEqual) {
5224 std::swap(DefaultBB, EdgeBB);
5225 if (HasProfile)
5226 std::swap(BranchWeights[0], BranchWeights[1]);
5227 }
5228
5229 BasicBlock *BB = BI->getParent();
5230
5231 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5232 << " cases into SWITCH. BB is:\n"
5233 << *BB);
5234
5235 SmallVector<DominatorTree::UpdateType, 2> Updates;
5236
5237 // If there are any extra values that couldn't be folded into the switch
5238 // then we evaluate them with an explicit branch first. Split the block
5239 // right before the condbr to handle it.
5240 if (ExtraCase) {
5241 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5242 /*MSSAU=*/nullptr, "switch.early.test");
5243
5244 // Remove the uncond branch added to the old block.
5245 Instruction *OldTI = BB->getTerminator();
5246 Builder.SetInsertPoint(OldTI);
5247
5248 // There can be an unintended UB if extra values are Poison. Before the
5249 // transformation, extra values may not be evaluated according to the
5250 // condition, and it will not raise UB. But after transformation, we are
5251 // evaluating extra values before checking the condition, and it will raise
5252 // UB. It can be solved by adding freeze instruction to extra values.
5253 AssumptionCache *AC = Options.AC;
5254
5255 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5256 ExtraCase = Builder.CreateFreeze(ExtraCase);
5257
5258 // We don't have any info about this condition.
5259 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5260 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5262
5263 OldTI->eraseFromParent();
5264
5265 if (DTU)
5266 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5267
5268 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5269 // for the edge we just added.
5270 addPredecessorToBlock(EdgeBB, BB, NewBB);
5271
5272 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5273 << "\nEXTRABB = " << *BB);
5274 BB = NewBB;
5275 }
5276
5277 Builder.SetInsertPoint(BI);
5278 // Convert pointer to int before we switch.
5279 if (CompVal->getType()->isPointerTy()) {
5280 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5281 "Should not end up here with unstable pointers");
5282 CompVal = Builder.CreatePtrToInt(
5283 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5284 }
5285
5286 // Check if we can represent the values as a contiguous range. If so, we use a
5287 // range check + conditional branch instead of a switch.
5288 if (Values.front()->getValue() - Values.back()->getValue() ==
5289 Values.size() - 1) {
5290 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5291 Values.back()->getValue(), Values.front()->getValue() + 1);
5292 APInt Offset, RHS;
5293 ICmpInst::Predicate Pred;
5294 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5295 Value *X = CompVal;
5296 if (!Offset.isZero())
5297 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5298 Value *Cond =
5299 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5300 CondBrInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5301 if (HasProfile)
5302 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5303 // We don't need to update PHI nodes since we don't add any new edges.
5304 } else {
5305 // Create the new switch instruction now.
5306 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5307 if (HasProfile) {
5308 // We know the weight of the default case. We don't know the weight of the
5309 // other cases, but rather than completely lose profiling info, we split
5310 // the remaining probability equally over them.
5311 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5312 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5313 // if TrueWhenEqual.
5314 for (auto &V : drop_begin(NewWeights))
5315 V = BranchWeights[0] / Values.size();
5316 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5317 }
5318
5319 // Add all of the 'cases' to the switch instruction.
5320 for (ConstantInt *Val : Values)
5321 New->addCase(Val, EdgeBB);
5322
5323 // We added edges from PI to the EdgeBB. As such, if there were any
5324 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5325 // the number of edges added.
5326 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5327 PHINode *PN = cast<PHINode>(BBI);
5328 Value *InVal = PN->getIncomingValueForBlock(BB);
5329 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5330 PN->addIncoming(InVal, BB);
5331 }
5332 }
5333
5334 // Erase the old branch instruction.
5336 if (DTU)
5337 DTU->applyUpdates(Updates);
5338
5339 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5340 return true;
5341}
5342
5343bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5344 if (isa<PHINode>(RI->getValue()))
5345 return simplifyCommonResume(RI);
5346 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5347 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5348 // The resume must unwind the exception that caused control to branch here.
5349 return simplifySingleResume(RI);
5350
5351 return false;
5352}
5353
5354// Check if cleanup block is empty
5356 for (Instruction &I : R) {
5357 auto *II = dyn_cast<IntrinsicInst>(&I);
5358 if (!II)
5359 return false;
5360
5361 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5362 switch (IntrinsicID) {
5363 case Intrinsic::dbg_declare:
5364 case Intrinsic::dbg_value:
5365 case Intrinsic::dbg_label:
5366 case Intrinsic::lifetime_end:
5367 break;
5368 default:
5369 return false;
5370 }
5371 }
5372 return true;
5373}
5374
5375// Simplify resume that is shared by several landing pads (phi of landing pad).
5376bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5377 BasicBlock *BB = RI->getParent();
5378
5379 // Check that there are no other instructions except for debug and lifetime
5380 // intrinsics between the phi's and resume instruction.
5381 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5382 BB->getTerminator()->getIterator())))
5383 return false;
5384
5385 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5386 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5387
5388 // Check incoming blocks to see if any of them are trivial.
5389 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5390 Idx++) {
5391 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5392 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5393
5394 // If the block has other successors, we can not delete it because
5395 // it has other dependents.
5396 if (IncomingBB->getUniqueSuccessor() != BB)
5397 continue;
5398
5399 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5400 // Not the landing pad that caused the control to branch here.
5401 if (IncomingValue != LandingPad)
5402 continue;
5403
5405 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5406 TrivialUnwindBlocks.insert(IncomingBB);
5407 }
5408
5409 // If no trivial unwind blocks, don't do any simplifications.
5410 if (TrivialUnwindBlocks.empty())
5411 return false;
5412
5413 // Turn all invokes that unwind here into calls.
5414 for (auto *TrivialBB : TrivialUnwindBlocks) {
5415 // Blocks that will be simplified should be removed from the phi node.
5416 // Note there could be multiple edges to the resume block, and we need
5417 // to remove them all.
5418 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5419 BB->removePredecessor(TrivialBB, true);
5420
5421 for (BasicBlock *Pred :
5423 removeUnwindEdge(Pred, DTU);
5424 ++NumInvokes;
5425 }
5426
5427 // In each SimplifyCFG run, only the current processed block can be erased.
5428 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5429 // of erasing TrivialBB, we only remove the branch to the common resume
5430 // block so that we can later erase the resume block since it has no
5431 // predecessors.
5432 TrivialBB->getTerminator()->eraseFromParent();
5433 new UnreachableInst(RI->getContext(), TrivialBB);
5434 if (DTU)
5435 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5436 }
5437
5438 // Delete the resume block if all its predecessors have been removed.
5439 if (pred_empty(BB))
5440 DeleteDeadBlock(BB, DTU);
5441
5442 return !TrivialUnwindBlocks.empty();
5443}
5444
5445// Simplify resume that is only used by a single (non-phi) landing pad.
5446bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5447 BasicBlock *BB = RI->getParent();
5448 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5449 assert(RI->getValue() == LPInst &&
5450 "Resume must unwind the exception that caused control to here");
5451
5452 // Check that there are no other instructions except for debug intrinsics.
5454 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5455 return false;
5456
5457 // Turn all invokes that unwind here into calls and delete the basic block.
5458 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5459 removeUnwindEdge(Pred, DTU);
5460 ++NumInvokes;
5461 }
5462
5463 // The landingpad is now unreachable. Zap it.
5464 DeleteDeadBlock(BB, DTU);
5465 return true;
5466}
5467
5469 // If this is a trivial cleanup pad that executes no instructions, it can be
5470 // eliminated. If the cleanup pad continues to the caller, any predecessor
5471 // that is an EH pad will be updated to continue to the caller and any
5472 // predecessor that terminates with an invoke instruction will have its invoke
5473 // instruction converted to a call instruction. If the cleanup pad being
5474 // simplified does not continue to the caller, each predecessor will be
5475 // updated to continue to the unwind destination of the cleanup pad being
5476 // simplified.
5477 BasicBlock *BB = RI->getParent();
5478 CleanupPadInst *CPInst = RI->getCleanupPad();
5479 if (CPInst->getParent() != BB)
5480 // This isn't an empty cleanup.
5481 return false;
5482
5483 // We cannot kill the pad if it has multiple uses. This typically arises
5484 // from unreachable basic blocks.
5485 if (!CPInst->hasOneUse())
5486 return false;
5487
5488 // Check that there are no other instructions except for benign intrinsics.
5490 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5491 return false;
5492
5493 // If the cleanup return we are simplifying unwinds to the caller, this will
5494 // set UnwindDest to nullptr.
5495 BasicBlock *UnwindDest = RI->getUnwindDest();
5496
5497 // We're about to remove BB from the control flow. Before we do, sink any
5498 // PHINodes into the unwind destination. Doing this before changing the
5499 // control flow avoids some potentially slow checks, since we can currently
5500 // be certain that UnwindDest and BB have no common predecessors (since they
5501 // are both EH pads).
5502 if (UnwindDest) {
5503 // First, go through the PHI nodes in UnwindDest and update any nodes that
5504 // reference the block we are removing
5505 for (PHINode &DestPN : UnwindDest->phis()) {
5506 int Idx = DestPN.getBasicBlockIndex(BB);
5507 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5508 assert(Idx != -1);
5509 // This PHI node has an incoming value that corresponds to a control
5510 // path through the cleanup pad we are removing. If the incoming
5511 // value is in the cleanup pad, it must be a PHINode (because we
5512 // verified above that the block is otherwise empty). Otherwise, the
5513 // value is either a constant or a value that dominates the cleanup
5514 // pad being removed.
5515 //
5516 // Because BB and UnwindDest are both EH pads, all of their
5517 // predecessors must unwind to these blocks, and since no instruction
5518 // can have multiple unwind destinations, there will be no overlap in
5519 // incoming blocks between SrcPN and DestPN.
5520 Value *SrcVal = DestPN.getIncomingValue(Idx);
5521 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5522
5523 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5524 for (auto *Pred : predecessors(BB)) {
5525 Value *Incoming =
5526 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5527 DestPN.addIncoming(Incoming, Pred);
5528 }
5529 }
5530
5531 // Sink any remaining PHI nodes directly into UnwindDest.
5532 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5533 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5534 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5535 // If the PHI node has no uses or all of its uses are in this basic
5536 // block (meaning they are debug or lifetime intrinsics), just leave
5537 // it. It will be erased when we erase BB below.
5538 continue;
5539
5540 // Otherwise, sink this PHI node into UnwindDest.
5541 // Any predecessors to UnwindDest which are not already represented
5542 // must be back edges which inherit the value from the path through
5543 // BB. In this case, the PHI value must reference itself.
5544 for (auto *pred : predecessors(UnwindDest))
5545 if (pred != BB)
5546 PN.addIncoming(&PN, pred);
5547 PN.moveBefore(InsertPt);
5548 // Also, add a dummy incoming value for the original BB itself,
5549 // so that the PHI is well-formed until we drop said predecessor.
5550 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5551 }
5552 }
5553
5554 std::vector<DominatorTree::UpdateType> Updates;
5555
5556 // We use make_early_inc_range here because we will remove all predecessors.
5558 if (UnwindDest == nullptr) {
5559 if (DTU) {
5560 DTU->applyUpdates(Updates);
5561 Updates.clear();
5562 }
5563 removeUnwindEdge(PredBB, DTU);
5564 ++NumInvokes;
5565 } else {
5566 BB->removePredecessor(PredBB);
5567 Instruction *TI = PredBB->getTerminator();
5568 TI->replaceUsesOfWith(BB, UnwindDest);
5569 if (DTU) {
5570 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5571 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5572 }
5573 }
5574 }
5575
5576 if (DTU)
5577 DTU->applyUpdates(Updates);
5578
5579 DeleteDeadBlock(BB, DTU);
5580
5581 return true;
5582}
5583
5584// Try to merge two cleanuppads together.
5586 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5587 // with.
5588 BasicBlock *UnwindDest = RI->getUnwindDest();
5589 if (!UnwindDest)
5590 return false;
5591
5592 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5593 // be safe to merge without code duplication.
5594 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5595 return false;
5596
5597 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5598 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5599 if (!SuccessorCleanupPad)
5600 return false;
5601
5602 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5603 // Replace any uses of the successor cleanupad with the predecessor pad
5604 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5605 // funclet bundle operands.
5606 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5607 // Remove the old cleanuppad.
5608 SuccessorCleanupPad->eraseFromParent();
5609 // Now, we simply replace the cleanupret with a branch to the unwind
5610 // destination.
5611 UncondBrInst::Create(UnwindDest, RI->getParent());
5612 RI->eraseFromParent();
5613
5614 return true;
5615}
5616
5617bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5618 // It is possible to transiantly have an undef cleanuppad operand because we
5619 // have deleted some, but not all, dead blocks.
5620 // Eventually, this block will be deleted.
5621 if (isa<UndefValue>(RI->getOperand(0)))
5622 return false;
5623
5624 if (mergeCleanupPad(RI))
5625 return true;
5626
5627 if (removeEmptyCleanup(RI, DTU))
5628 return true;
5629
5630 return false;
5631}
5632
5633// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5634bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5635 BasicBlock *BB = UI->getParent();
5636
5637 bool Changed = false;
5638
5639 // Ensure that any debug-info records that used to occur after the Unreachable
5640 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5641 // the block.
5643
5644 // Debug-info records on the unreachable inst itself should be deleted, as
5645 // below we delete everything past the final executable instruction.
5646 UI->dropDbgRecords();
5647
5648 // If there are any instructions immediately before the unreachable that can
5649 // be removed, do so.
5650 while (UI->getIterator() != BB->begin()) {
5652 --BBI;
5653
5655 break; // Can not drop any more instructions. We're done here.
5656 // Otherwise, this instruction can be freely erased,
5657 // even if it is not side-effect free.
5658
5659 // Note that deleting EH's here is in fact okay, although it involves a bit
5660 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5661 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5662 // and we can therefore guarantee this block will be erased.
5663
5664 // If we're deleting this, we're deleting any subsequent debug info, so
5665 // delete DbgRecords.
5666 BBI->dropDbgRecords();
5667
5668 // Delete this instruction (any uses are guaranteed to be dead)
5669 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5670 BBI->eraseFromParent();
5671 Changed = true;
5672 }
5673
5674 // If the unreachable instruction is the first in the block, take a gander
5675 // at all of the predecessors of this instruction, and simplify them.
5676 if (&BB->front() != UI)
5677 return Changed;
5678
5679 std::vector<DominatorTree::UpdateType> Updates;
5680
5681 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5682 for (BasicBlock *Predecessor : Preds) {
5683 Instruction *TI = Predecessor->getTerminator();
5684 IRBuilder<> Builder(TI);
5685 if (isa<UncondBrInst>(TI)) {
5686 new UnreachableInst(TI->getContext(), TI->getIterator());
5687 TI->eraseFromParent();
5688 Changed = true;
5689 if (DTU)
5690 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5691 } else if (auto *BI = dyn_cast<CondBrInst>(TI)) {
5692 // We could either have a proper unconditional branch,
5693 // or a degenerate conditional branch with matching destinations.
5694 if (BI->getSuccessor(0) == BI->getSuccessor(1)) {
5695 new UnreachableInst(TI->getContext(), TI->getIterator());
5696 TI->eraseFromParent();
5697 Changed = true;
5698 } else {
5699 Value* Cond = BI->getCondition();
5700 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5701 "The destinations are guaranteed to be different here.");
5702 CallInst *Assumption;
5703 if (BI->getSuccessor(0) == BB) {
5704 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5705 Builder.CreateBr(BI->getSuccessor(1));
5706 } else {
5707 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5708 Assumption = Builder.CreateAssumption(Cond);
5709 Builder.CreateBr(BI->getSuccessor(0));
5710 }
5711 if (Options.AC)
5712 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5713
5715 Changed = true;
5716 }
5717 if (DTU)
5718 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5719 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5720 SwitchInstProfUpdateWrapper SU(*SI);
5721 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5722 if (i->getCaseSuccessor() != BB) {
5723 ++i;
5724 continue;
5725 }
5726 BB->removePredecessor(SU->getParent());
5727 i = SU.removeCase(i);
5728 e = SU->case_end();
5729 Changed = true;
5730 }
5731 // Note that the default destination can't be removed!
5732 if (DTU && SI->getDefaultDest() != BB)
5733 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5734 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5735 if (II->getUnwindDest() == BB) {
5736 if (DTU) {
5737 DTU->applyUpdates(Updates);
5738 Updates.clear();
5739 }
5740 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5741 if (!CI->doesNotThrow())
5742 CI->setDoesNotThrow();
5743 Changed = true;
5744 }
5745 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5746 if (CSI->getUnwindDest() == BB) {
5747 if (DTU) {
5748 DTU->applyUpdates(Updates);
5749 Updates.clear();
5750 }
5751 removeUnwindEdge(TI->getParent(), DTU);
5752 Changed = true;
5753 continue;
5754 }
5755
5756 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5757 E = CSI->handler_end();
5758 I != E; ++I) {
5759 if (*I == BB) {
5760 CSI->removeHandler(I);
5761 --I;
5762 --E;
5763 Changed = true;
5764 }
5765 }
5766 if (DTU)
5767 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5768 if (CSI->getNumHandlers() == 0) {
5769 if (CSI->hasUnwindDest()) {
5770 // Redirect all predecessors of the block containing CatchSwitchInst
5771 // to instead branch to the CatchSwitchInst's unwind destination.
5772 if (DTU) {
5773 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5774 Updates.push_back({DominatorTree::Insert,
5775 PredecessorOfPredecessor,
5776 CSI->getUnwindDest()});
5777 Updates.push_back({DominatorTree::Delete,
5778 PredecessorOfPredecessor, Predecessor});
5779 }
5780 }
5781 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5782 } else {
5783 // Rewrite all preds to unwind to caller (or from invoke to call).
5784 if (DTU) {
5785 DTU->applyUpdates(Updates);
5786 Updates.clear();
5787 }
5788 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5789 for (BasicBlock *EHPred : EHPreds)
5790 removeUnwindEdge(EHPred, DTU);
5791 }
5792 // The catchswitch is no longer reachable.
5793 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5794 CSI->eraseFromParent();
5795 Changed = true;
5796 }
5797 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5798 (void)CRI;
5799 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5800 "Expected to always have an unwind to BB.");
5801 if (DTU)
5802 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5803 new UnreachableInst(TI->getContext(), TI->getIterator());
5804 TI->eraseFromParent();
5805 Changed = true;
5806 }
5807 }
5808
5809 if (DTU)
5810 DTU->applyUpdates(Updates);
5811
5812 // If this block is now dead, remove it.
5813 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5814 DeleteDeadBlock(BB, DTU);
5815 return true;
5816 }
5817
5818 return Changed;
5819}
5820
5829
5830static std::optional<ContiguousCasesResult>
5833 BasicBlock *Dest, BasicBlock *OtherDest) {
5834 assert(Cases.size() >= 1);
5835
5837 const APInt &Min = Cases.back()->getValue();
5838 const APInt &Max = Cases.front()->getValue();
5839 APInt Offset = Max - Min;
5840 size_t ContiguousOffset = Cases.size() - 1;
5841 if (Offset == ContiguousOffset) {
5842 return ContiguousCasesResult{
5843 /*Min=*/Cases.back(),
5844 /*Max=*/Cases.front(),
5845 /*Dest=*/Dest,
5846 /*OtherDest=*/OtherDest,
5847 /*Cases=*/&Cases,
5848 /*OtherCases=*/&OtherCases,
5849 };
5850 }
5851 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false,
5852 SimplifyQuery(Dest->getDataLayout()));
5853 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5854 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5855 // contiguous range for the other destination. N.B. If CR is not a full range,
5856 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5857 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5858 assert(Cases.size() >= 2);
5859 auto *It =
5860 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5861 return L->getValue() != R->getValue() + 1;
5862 });
5863 if (It == Cases.end())
5864 return std::nullopt;
5865 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5866 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5867 Cases.size() - 2) {
5868 return ContiguousCasesResult{
5869 /*Min=*/cast<ConstantInt>(
5870 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5871 /*Max=*/
5873 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5874 /*Dest=*/OtherDest,
5875 /*OtherDest=*/Dest,
5876 /*Cases=*/&OtherCases,
5877 /*OtherCases=*/&Cases,
5878 };
5879 }
5880 }
5881 return std::nullopt;
5882}
5883
5885 DomTreeUpdater *DTU,
5886 bool RemoveOrigDefaultBlock = true) {
5887 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5888 auto *BB = Switch->getParent();
5889 auto *OrigDefaultBlock = Switch->getDefaultDest();
5890 if (RemoveOrigDefaultBlock)
5891 OrigDefaultBlock->removePredecessor(BB);
5892 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5893 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5894 OrigDefaultBlock);
5895 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5897 Switch->setDefaultDest(&*NewDefaultBlock);
5898 if (DTU) {
5900 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5901 if (RemoveOrigDefaultBlock &&
5902 !is_contained(successors(BB), OrigDefaultBlock))
5903 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5904 DTU->applyUpdates(Updates);
5905 }
5906}
5907
5908/// Turn a switch into an integer range comparison and branch.
5909/// Switches with more than 2 destinations are ignored.
5910/// Switches with 1 destination are also ignored.
5911bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5912 IRBuilder<> &Builder) {
5913 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5914
5915 bool HasDefault = !SI->defaultDestUnreachable();
5916
5917 auto *BB = SI->getParent();
5918 // Partition the cases into two sets with different destinations.
5919 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5920 BasicBlock *DestB = nullptr;
5923
5924 for (auto Case : SI->cases()) {
5925 BasicBlock *Dest = Case.getCaseSuccessor();
5926 if (!DestA)
5927 DestA = Dest;
5928 if (Dest == DestA) {
5929 CasesA.push_back(Case.getCaseValue());
5930 continue;
5931 }
5932 if (!DestB)
5933 DestB = Dest;
5934 if (Dest == DestB) {
5935 CasesB.push_back(Case.getCaseValue());
5936 continue;
5937 }
5938 return false; // More than two destinations.
5939 }
5940 if (!DestB)
5941 return false; // All destinations are the same and the default is unreachable
5942
5943 assert(DestA && DestB &&
5944 "Single-destination switch should have been folded.");
5945 assert(DestA != DestB);
5946 assert(DestB != SI->getDefaultDest());
5947 assert(!CasesB.empty() && "There must be non-default cases.");
5948 assert(!CasesA.empty() || HasDefault);
5949
5950 // Figure out if one of the sets of cases form a contiguous range.
5951 std::optional<ContiguousCasesResult> ContiguousCases;
5952
5953 // Only one icmp is needed when there is only one case.
5954 if (!HasDefault && CasesA.size() == 1)
5955 ContiguousCases = ContiguousCasesResult{
5956 /*Min=*/CasesA[0],
5957 /*Max=*/CasesA[0],
5958 /*Dest=*/DestA,
5959 /*OtherDest=*/DestB,
5960 /*Cases=*/&CasesA,
5961 /*OtherCases=*/&CasesB,
5962 };
5963 else if (CasesB.size() == 1)
5964 ContiguousCases = ContiguousCasesResult{
5965 /*Min=*/CasesB[0],
5966 /*Max=*/CasesB[0],
5967 /*Dest=*/DestB,
5968 /*OtherDest=*/DestA,
5969 /*Cases=*/&CasesB,
5970 /*OtherCases=*/&CasesA,
5971 };
5972 // Correctness: Cases to the default destination cannot be contiguous cases.
5973 else if (!HasDefault)
5974 ContiguousCases =
5975 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
5976
5977 if (!ContiguousCases)
5978 ContiguousCases =
5979 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
5980
5981 if (!ContiguousCases)
5982 return false;
5983
5984 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
5985
5986 // Start building the compare and branch.
5987
5989 Constant *NumCases = ConstantInt::get(Offset->getType(),
5990 Max->getValue() - Min->getValue() + 1);
5991 Instruction *NewBI;
5992 if (NumCases->isOneValue()) {
5993 assert(Max->getValue() == Min->getValue());
5994 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
5995 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
5996 }
5997 // If NumCases overflowed, then all possible values jump to the successor.
5998 else if (NumCases->isNullValue() && !Cases->empty()) {
5999 NewBI = Builder.CreateBr(Dest);
6000 } else {
6001 Value *Sub = SI->getCondition();
6002 if (!Offset->isNullValue())
6003 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6004 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6005 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6006 }
6007
6008 // Update weight for the newly-created conditional branch.
6009 if (hasBranchWeightMD(*SI) && isa<CondBrInst>(NewBI)) {
6010 SmallVector<uint64_t, 8> Weights;
6011 getBranchWeights(SI, Weights);
6012 if (Weights.size() == 1 + SI->getNumCases()) {
6013 uint64_t TrueWeight = 0;
6014 uint64_t FalseWeight = 0;
6015 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6016 if (SI->getSuccessor(I) == Dest)
6017 TrueWeight += Weights[I];
6018 else
6019 FalseWeight += Weights[I];
6020 }
6021 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6022 TrueWeight /= 2;
6023 FalseWeight /= 2;
6024 }
6025 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6026 /*IsExpected=*/false, /*ElideAllZero=*/true);
6027 }
6028 }
6029
6030 // Prune obsolete incoming values off the successors' PHI nodes.
6031 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6032 unsigned PreviousEdges = Cases->size();
6033 if (Dest == SI->getDefaultDest())
6034 ++PreviousEdges;
6035 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6036 PHI.removeIncomingValue(SI->getParent());
6037 }
6038 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6039 unsigned PreviousEdges = OtherCases->size();
6040 if (OtherDest == SI->getDefaultDest())
6041 ++PreviousEdges;
6042 unsigned E = PreviousEdges - 1;
6043 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6044 if (isa<UncondBrInst>(NewBI))
6045 ++E;
6046 for (unsigned I = 0; I != E; ++I)
6047 PHI.removeIncomingValue(SI->getParent());
6048 }
6049
6050 // Clean up the default block - it may have phis or other instructions before
6051 // the unreachable terminator.
6052 if (!HasDefault)
6054
6055 auto *UnreachableDefault = SI->getDefaultDest();
6056
6057 // Drop the switch.
6058 SI->eraseFromParent();
6059
6060 if (!HasDefault && DTU)
6061 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6062
6063 return true;
6064}
6065
6066/// Compute masked bits for the condition of a switch
6067/// and use it to remove dead cases.
6069 AssumptionCache *AC,
6070 const DataLayout &DL) {
6071 Value *Cond = SI->getCondition();
6072 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6074 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6075
6076 // We can also eliminate cases by determining that their values are outside of
6077 // the limited range of the condition based on how many significant (non-sign)
6078 // bits are in the condition value.
6079 unsigned MaxSignificantBitsInCond =
6081
6082 // Gather dead cases.
6084 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6085 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6086 for (const auto &Case : SI->cases()) {
6087 auto *Successor = Case.getCaseSuccessor();
6088 if (DTU) {
6089 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6090 if (Inserted)
6091 UniqueSuccessors.push_back(Successor);
6092 ++It->second;
6093 }
6094 ConstantInt *CaseC = Case.getCaseValue();
6095 const APInt &CaseVal = CaseC->getValue();
6096 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6097 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6098 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6099 DeadCases.push_back(CaseC);
6100 if (DTU)
6101 --NumPerSuccessorCases[Successor];
6102 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6103 << " is dead.\n");
6104 } else if (IsKnownValuesValid)
6105 KnownValues.erase(CaseC);
6106 }
6107
6108 // If we can prove that the cases must cover all possible values, the
6109 // default destination becomes dead and we can remove it. If we know some
6110 // of the bits in the value, we can use that to more precisely compute the
6111 // number of possible unique case values.
6112 bool HasDefault = !SI->defaultDestUnreachable();
6113 const unsigned NumUnknownBits =
6114 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6115 assert(NumUnknownBits <= Known.getBitWidth());
6116 if (HasDefault && DeadCases.empty()) {
6117 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6119 return true;
6120 }
6121
6122 if (NumUnknownBits < 64 /* avoid overflow */) {
6123 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6124 if (SI->getNumCases() == AllNumCases) {
6126 return true;
6127 }
6128 // When only one case value is missing, replace default with that case.
6129 // Eliminating the default branch will provide more opportunities for
6130 // optimization, such as lookup tables.
6131 if (SI->getNumCases() == AllNumCases - 1) {
6132 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6133 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6134 if (CondTy->getIntegerBitWidth() > 64 ||
6135 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6136 return false;
6137
6138 uint64_t MissingCaseVal = 0;
6139 for (const auto &Case : SI->cases())
6140 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6141 auto *MissingCase = cast<ConstantInt>(
6142 ConstantInt::get(Cond->getType(), MissingCaseVal));
6144 SIW.addCase(MissingCase, SI->getDefaultDest(),
6145 SIW.getSuccessorWeight(0));
6147 /*RemoveOrigDefaultBlock*/ false);
6148 SIW.setSuccessorWeight(0, 0);
6149 return true;
6150 }
6151 }
6152 }
6153
6154 if (DeadCases.empty())
6155 return false;
6156
6158 for (ConstantInt *DeadCase : DeadCases) {
6159 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6160 assert(CaseI != SI->case_default() &&
6161 "Case was not found. Probably mistake in DeadCases forming.");
6162 // Prune unused values from PHI nodes.
6163 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6164 SIW.removeCase(CaseI);
6165 }
6166
6167 if (DTU) {
6168 std::vector<DominatorTree::UpdateType> Updates;
6169 for (auto *Successor : UniqueSuccessors)
6170 if (NumPerSuccessorCases[Successor] == 0)
6171 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6172 DTU->applyUpdates(Updates);
6173 }
6174
6175 return true;
6176}
6177
6178/// If BB would be eligible for simplification by
6179/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6180/// by an unconditional branch), look at the phi node for BB in the successor
6181/// block and see if the incoming value is equal to CaseValue. If so, return
6182/// the phi node, and set PhiIndex to BB's index in the phi node.
6184 BasicBlock *BB, int *PhiIndex) {
6185 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6186 return nullptr; // BB must be empty to be a candidate for simplification.
6187 if (!BB->getSinglePredecessor())
6188 return nullptr; // BB must be dominated by the switch.
6189
6191 if (!Branch)
6192 return nullptr; // Terminator must be unconditional branch.
6193
6194 BasicBlock *Succ = Branch->getSuccessor();
6195
6196 for (PHINode &PHI : Succ->phis()) {
6197 int Idx = PHI.getBasicBlockIndex(BB);
6198 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6199
6200 Value *InValue = PHI.getIncomingValue(Idx);
6201 if (InValue != CaseValue)
6202 continue;
6203
6204 *PhiIndex = Idx;
6205 return &PHI;
6206 }
6207
6208 return nullptr;
6209}
6210
6211/// Try to forward the condition of a switch instruction to a phi node
6212/// dominated by the switch, if that would mean that some of the destination
6213/// blocks of the switch can be folded away. Return true if a change is made.
6215 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6216
6217 ForwardingNodesMap ForwardingNodes;
6218 BasicBlock *SwitchBlock = SI->getParent();
6219 bool Changed = false;
6220 for (const auto &Case : SI->cases()) {
6221 ConstantInt *CaseValue = Case.getCaseValue();
6222 BasicBlock *CaseDest = Case.getCaseSuccessor();
6223
6224 // Replace phi operands in successor blocks that are using the constant case
6225 // value rather than the switch condition variable:
6226 // switchbb:
6227 // switch i32 %x, label %default [
6228 // i32 17, label %succ
6229 // ...
6230 // succ:
6231 // %r = phi i32 ... [ 17, %switchbb ] ...
6232 // -->
6233 // %r = phi i32 ... [ %x, %switchbb ] ...
6234
6235 for (PHINode &Phi : CaseDest->phis()) {
6236 // This only works if there is exactly 1 incoming edge from the switch to
6237 // a phi. If there is >1, that means multiple cases of the switch map to 1
6238 // value in the phi, and that phi value is not the switch condition. Thus,
6239 // this transform would not make sense (the phi would be invalid because
6240 // a phi can't have different incoming values from the same block).
6241 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6242 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6243 count(Phi.blocks(), SwitchBlock) == 1) {
6244 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6245 Changed = true;
6246 }
6247 }
6248
6249 // Collect phi nodes that are indirectly using this switch's case constants.
6250 int PhiIdx;
6251 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6252 ForwardingNodes[Phi].push_back(PhiIdx);
6253 }
6254
6255 for (auto &ForwardingNode : ForwardingNodes) {
6256 PHINode *Phi = ForwardingNode.first;
6257 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6258 // Check if it helps to fold PHI.
6259 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6260 continue;
6261
6262 for (int Index : Indexes)
6263 Phi->setIncomingValue(Index, SI->getCondition());
6264 Changed = true;
6265 }
6266
6267 return Changed;
6268}
6269
6270/// Return true if the backend will be able to handle
6271/// initializing an array of constants like C.
6273 if (C->isThreadDependent())
6274 return false;
6275 if (C->isDLLImportDependent())
6276 return false;
6277
6280 return false;
6281
6282 // Globals cannot contain scalable types.
6283 if (C->getType()->isScalableTy())
6284 return false;
6285
6287 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6288 // materializing the array of constants.
6289 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6290 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6291 return false;
6292 }
6293
6294 if (!TTI.shouldBuildLookupTablesForConstant(C))
6295 return false;
6296
6297 return true;
6298}
6299
6300/// If V is a Constant, return it. Otherwise, try to look up
6301/// its constant value in ConstantPool, returning 0 if it's not there.
6302static Constant *
6305 if (Constant *C = dyn_cast<Constant>(V))
6306 return C;
6307 return ConstantPool.lookup(V);
6308}
6309
6310/// Try to fold instruction I into a constant. This works for
6311/// simple instructions such as binary operations where both operands are
6312/// constant or can be replaced by constants from the ConstantPool. Returns the
6313/// resulting constant on success, 0 otherwise.
6314static Constant *
6318 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6319 if (!A)
6320 return nullptr;
6321 if (A->isAllOnesValue())
6322 return lookupConstant(Select->getTrueValue(), ConstantPool);
6323 if (A->isNullValue())
6324 return lookupConstant(Select->getFalseValue(), ConstantPool);
6325 return nullptr;
6326 }
6327
6329 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6330 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6331 COps.push_back(A);
6332 else
6333 return nullptr;
6334 }
6335
6336 return ConstantFoldInstOperands(I, COps, DL);
6337}
6338
6339/// Try to determine the resulting constant values in phi nodes
6340/// at the common destination basic block, *CommonDest, for one of the case
6341/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6342/// default case), of a switch instruction SI.
6343static bool
6345 BasicBlock **CommonDest,
6346 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6347 const DataLayout &DL, const TargetTransformInfo &TTI) {
6348 // The block from which we enter the common destination.
6349 BasicBlock *Pred = SI->getParent();
6350
6351 // If CaseDest is empty except for some side-effect free instructions through
6352 // which we can constant-propagate the CaseVal, continue to its successor.
6354 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6355 for (Instruction &I : *CaseDest) {
6356 if (I.isTerminator()) {
6357 // If the terminator is a simple branch, continue to the next block.
6358 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6359 return false;
6360 Pred = CaseDest;
6361 CaseDest = I.getSuccessor(0);
6362 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6363 // Instruction is side-effect free and constant.
6364
6365 // If the instruction has uses outside this block or a phi node slot for
6366 // the block, it is not safe to bypass the instruction since it would then
6367 // no longer dominate all its uses.
6368 for (auto &Use : I.uses()) {
6369 User *User = Use.getUser();
6371 if (I->getParent() == CaseDest)
6372 continue;
6373 if (PHINode *Phi = dyn_cast<PHINode>(User))
6374 if (Phi->getIncomingBlock(Use) == CaseDest)
6375 continue;
6376 return false;
6377 }
6378
6379 ConstantPool.insert(std::make_pair(&I, C));
6380 } else {
6381 break;
6382 }
6383 }
6384
6385 // If we did not have a CommonDest before, use the current one.
6386 if (!*CommonDest)
6387 *CommonDest = CaseDest;
6388 // If the destination isn't the common one, abort.
6389 if (CaseDest != *CommonDest)
6390 return false;
6391
6392 // Get the values for this case from phi nodes in the destination block.
6393 for (PHINode &PHI : (*CommonDest)->phis()) {
6394 int Idx = PHI.getBasicBlockIndex(Pred);
6395 if (Idx == -1)
6396 continue;
6397
6398 Constant *ConstVal =
6399 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6400 if (!ConstVal)
6401 return false;
6402
6403 // Be conservative about which kinds of constants we support.
6404 if (!validLookupTableConstant(ConstVal, TTI))
6405 return false;
6406
6407 Res.push_back(std::make_pair(&PHI, ConstVal));
6408 }
6409
6410 return Res.size() > 0;
6411}
6412
6413// Helper function used to add CaseVal to the list of cases that generate
6414// Result. Returns the updated number of cases that generate this result.
6415static size_t mapCaseToResult(ConstantInt *CaseVal,
6416 SwitchCaseResultVectorTy &UniqueResults,
6417 Constant *Result) {
6418 for (auto &I : UniqueResults) {
6419 if (I.first == Result) {
6420 I.second.push_back(CaseVal);
6421 return I.second.size();
6422 }
6423 }
6424 UniqueResults.push_back(
6425 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6426 return 1;
6427}
6428
6429// Helper function that initializes a map containing
6430// results for the PHI node of the common destination block for a switch
6431// instruction. Returns false if multiple PHI nodes have been found or if
6432// there is not a common destination block for the switch.
6434 BasicBlock *&CommonDest,
6435 SwitchCaseResultVectorTy &UniqueResults,
6436 Constant *&DefaultResult,
6437 const DataLayout &DL,
6438 const TargetTransformInfo &TTI,
6439 uintptr_t MaxUniqueResults) {
6440 for (const auto &I : SI->cases()) {
6441 ConstantInt *CaseVal = I.getCaseValue();
6442
6443 // Resulting value at phi nodes for this case value.
6444 SwitchCaseResultsTy Results;
6445 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6446 DL, TTI))
6447 return false;
6448
6449 // Only one value per case is permitted.
6450 if (Results.size() > 1)
6451 return false;
6452
6453 // Add the case->result mapping to UniqueResults.
6454 const size_t NumCasesForResult =
6455 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6456
6457 // Early out if there are too many cases for this result.
6458 if (NumCasesForResult > MaxSwitchCasesPerResult)
6459 return false;
6460
6461 // Early out if there are too many unique results.
6462 if (UniqueResults.size() > MaxUniqueResults)
6463 return false;
6464
6465 // Check the PHI consistency.
6466 if (!PHI)
6467 PHI = Results[0].first;
6468 else if (PHI != Results[0].first)
6469 return false;
6470 }
6471 // Find the default result value.
6473 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6474 DL, TTI);
6475 // If the default value is not found abort unless the default destination
6476 // is unreachable.
6477 DefaultResult =
6478 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6479
6480 return DefaultResult || SI->defaultDestUnreachable();
6481}
6482
6483// Helper function that checks if it is possible to transform a switch with only
6484// two cases (or two cases + default) that produces a result into a select.
6485// TODO: Handle switches with more than 2 cases that map to the same result.
6486// The branch weights correspond to the provided Condition (i.e. if Condition is
6487// modified from the original SwitchInst, the caller must adjust the weights)
6488static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6489 Constant *DefaultResult, Value *Condition,
6490 IRBuilder<> &Builder, const DataLayout &DL,
6491 ArrayRef<uint32_t> BranchWeights) {
6492 // If we are selecting between only two cases transform into a simple
6493 // select or a two-way select if default is possible.
6494 // Example:
6495 // switch (a) { %0 = icmp eq i32 %a, 10
6496 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6497 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6498 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6499 // }
6500
6501 const bool HasBranchWeights =
6502 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6503
6504 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6505 ResultVector[1].second.size() == 1) {
6506 ConstantInt *FirstCase = ResultVector[0].second[0];
6507 ConstantInt *SecondCase = ResultVector[1].second[0];
6508 Value *SelectValue = ResultVector[1].first;
6509 if (DefaultResult) {
6510 Value *ValueCompare =
6511 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6512 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6513 DefaultResult, "switch.select");
6514 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6515 SI && HasBranchWeights) {
6516 // We start with 3 probabilities, where the numerator is the
6517 // corresponding BranchWeights[i], and the denominator is the sum over
6518 // BranchWeights. We want the probability and negative probability of
6519 // Condition == SecondCase.
6520 assert(BranchWeights.size() == 3);
6522 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6523 /*IsExpected=*/false, /*ElideAllZero=*/true);
6524 }
6525 }
6526 Value *ValueCompare =
6527 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6528 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6529 SelectValue, "switch.select");
6530 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6531 // We may have had a DefaultResult. Base the position of the first and
6532 // second's branch weights accordingly. Also the proability that Condition
6533 // != FirstCase needs to take that into account.
6534 assert(BranchWeights.size() >= 2);
6535 size_t FirstCasePos = (Condition != nullptr);
6536 size_t SecondCasePos = FirstCasePos + 1;
6537 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6539 {BranchWeights[FirstCasePos],
6540 DefaultCase + BranchWeights[SecondCasePos]},
6541 /*IsExpected=*/false, /*ElideAllZero=*/true);
6542 }
6543 return Ret;
6544 }
6545
6546 // Handle the degenerate case where two cases have the same result value.
6547 if (ResultVector.size() == 1 && DefaultResult) {
6548 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6549 unsigned CaseCount = CaseValues.size();
6550 // n bits group cases map to the same result:
6551 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6552 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6553 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6554 if (isPowerOf2_32(CaseCount)) {
6555 ConstantInt *MinCaseVal = CaseValues[0];
6556 // If there are bits that are set exclusively by CaseValues, we
6557 // can transform the switch into a select if the conjunction of
6558 // all the values uniquely identify CaseValues.
6559 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6560
6561 // Find the minimum value and compute the and of all the case values.
6562 for (auto *Case : CaseValues) {
6563 if (Case->getValue().slt(MinCaseVal->getValue()))
6564 MinCaseVal = Case;
6565 AndMask &= Case->getValue();
6566 }
6567 KnownBits Known = computeKnownBits(Condition, DL);
6568
6569 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6570 // Compute the number of bits that are free to vary.
6571 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6572
6573 // Check if the number of values covered by the mask is equal
6574 // to the number of cases.
6575 if (FreeBits == Log2_32(CaseCount)) {
6576 Value *And = Builder.CreateAnd(Condition, AndMask);
6577 Value *Cmp = Builder.CreateICmpEQ(
6578 And, Constant::getIntegerValue(And->getType(), AndMask));
6579 Value *Ret =
6580 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6581 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6582 // We know there's a Default case. We base the resulting branch
6583 // weights off its probability.
6584 assert(BranchWeights.size() >= 2);
6586 *SI,
6587 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6588 /*IsExpected=*/false, /*ElideAllZero=*/true);
6589 }
6590 return Ret;
6591 }
6592 }
6593
6594 // Mark the bits case number touched.
6595 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6596 for (auto *Case : CaseValues)
6597 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6598
6599 // Check if cases with the same result can cover all number
6600 // in touched bits.
6601 if (BitMask.popcount() == Log2_32(CaseCount)) {
6602 if (!MinCaseVal->isNullValue())
6603 Condition = Builder.CreateSub(Condition, MinCaseVal);
6604 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6605 Value *Cmp = Builder.CreateICmpEQ(
6606 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6607 Value *Ret =
6608 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6609 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6610 assert(BranchWeights.size() >= 2);
6612 *SI,
6613 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6614 /*IsExpected=*/false, /*ElideAllZero=*/true);
6615 }
6616 return Ret;
6617 }
6618 }
6619
6620 // Handle the degenerate case where two cases have the same value.
6621 if (CaseValues.size() == 2) {
6622 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6623 "switch.selectcmp.case1");
6624 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6625 "switch.selectcmp.case2");
6626 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6627 Value *Ret =
6628 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6629 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6630 assert(BranchWeights.size() >= 2);
6632 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6633 /*IsExpected=*/false, /*ElideAllZero=*/true);
6634 }
6635 return Ret;
6636 }
6637 }
6638
6639 return nullptr;
6640}
6641
6642// Helper function to cleanup a switch instruction that has been converted into
6643// a select, fixing up PHI nodes and basic blocks.
6645 Value *SelectValue,
6646 IRBuilder<> &Builder,
6647 DomTreeUpdater *DTU) {
6648 std::vector<DominatorTree::UpdateType> Updates;
6649
6650 BasicBlock *SelectBB = SI->getParent();
6651 BasicBlock *DestBB = PHI->getParent();
6652
6653 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6654 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6655 Builder.CreateBr(DestBB);
6656
6657 // Remove the switch.
6658
6659 PHI->removeIncomingValueIf(
6660 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6661 PHI->addIncoming(SelectValue, SelectBB);
6662
6663 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6664 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6665 BasicBlock *Succ = SI->getSuccessor(i);
6666
6667 if (Succ == DestBB)
6668 continue;
6669 Succ->removePredecessor(SelectBB);
6670 if (DTU && RemovedSuccessors.insert(Succ).second)
6671 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6672 }
6673 SI->eraseFromParent();
6674 if (DTU)
6675 DTU->applyUpdates(Updates);
6676}
6677
6678/// If a switch is only used to initialize one or more phi nodes in a common
6679/// successor block with only two different constant values, try to replace the
6680/// switch with a select. Returns true if the fold was made.
6682 DomTreeUpdater *DTU, const DataLayout &DL,
6683 const TargetTransformInfo &TTI) {
6684 Value *const Cond = SI->getCondition();
6685 PHINode *PHI = nullptr;
6686 BasicBlock *CommonDest = nullptr;
6687 Constant *DefaultResult;
6688 SwitchCaseResultVectorTy UniqueResults;
6689 // Collect all the cases that will deliver the same value from the switch.
6690 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6691 DL, TTI, /*MaxUniqueResults*/ 2))
6692 return false;
6693
6694 assert(PHI != nullptr && "PHI for value select not found");
6695 Builder.SetInsertPoint(SI);
6696 SmallVector<uint32_t, 4> BranchWeights;
6698 [[maybe_unused]] auto HasWeights =
6700 assert(!HasWeights == (BranchWeights.empty()));
6701 }
6702 assert(BranchWeights.empty() ||
6703 (BranchWeights.size() >=
6704 UniqueResults.size() + (DefaultResult != nullptr)));
6705
6706 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6707 Builder, DL, BranchWeights);
6708 if (!SelectValue)
6709 return false;
6710
6711 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6712 return true;
6713}
6714
6715namespace {
6716
6717/// This class finds alternatives for switches to ultimately
6718/// replace the switch.
6719class SwitchReplacement {
6720public:
6721 /// Create a helper for optimizations to use as a switch replacement.
6722 /// Find a better representation for the content of Values,
6723 /// using DefaultValue to fill any holes in the table.
6724 SwitchReplacement(
6725 Module &M, uint64_t TableSize, ConstantInt *Offset,
6726 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6727 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6728
6729 /// Build instructions with Builder to retrieve values using Index
6730 /// and replace the switch.
6731 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6732 Function *Func);
6733
6734 /// Return true if a table with TableSize elements of
6735 /// type ElementType would fit in a target-legal register.
6736 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6737 Type *ElementType);
6738
6739 /// Return the default value of the switch.
6740 Constant *getDefaultValue();
6741
6742 /// Return true if the replacement is a lookup table.
6743 bool isLookupTable();
6744
6745 /// Return true if the replacement is a bit map.
6746 bool isBitMap();
6747
6748private:
6749 // Depending on the switch, there are different alternatives.
6750 enum {
6751 // For switches where each case contains the same value, we just have to
6752 // store that single value and return it for each lookup.
6753 SingleValueKind,
6754
6755 // For switches where there is a linear relationship between table index
6756 // and values. We calculate the result with a simple multiplication
6757 // and addition instead of a table lookup.
6758 LinearMapKind,
6759
6760 // For small tables with integer elements, we can pack them into a bitmap
6761 // that fits into a target-legal register. Values are retrieved by
6762 // shift and mask operations.
6763 BitMapKind,
6764
6765 // The table is stored as an array of values. Values are retrieved by load
6766 // instructions from the table.
6767 LookupTableKind
6768 } Kind;
6769
6770 // The default value of the switch.
6771 Constant *DefaultValue;
6772
6773 // The type of the output values.
6774 Type *ValueType;
6775
6776 // For SingleValueKind, this is the single value.
6777 Constant *SingleValue = nullptr;
6778
6779 // For BitMapKind, this is the bitmap.
6780 ConstantInt *BitMap = nullptr;
6781 IntegerType *BitMapElementTy = nullptr;
6782
6783 // For LinearMapKind, these are the constants used to derive the value.
6784 ConstantInt *LinearOffset = nullptr;
6785 ConstantInt *LinearMultiplier = nullptr;
6786 bool LinearMapValWrapped = false;
6787
6788 // For LookupTableKind, this is the table.
6789 Constant *Initializer = nullptr;
6790};
6791
6792} // end anonymous namespace
6793
6794SwitchReplacement::SwitchReplacement(
6795 Module &M, uint64_t TableSize, ConstantInt *Offset,
6796 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6797 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6798 : DefaultValue(DefaultValue) {
6799 assert(Values.size() && "Can't build lookup table without values!");
6800 assert(TableSize >= Values.size() && "Can't fit values in table!");
6801
6802 // If all values in the table are equal, this is that value.
6803 SingleValue = Values.begin()->second;
6804
6805 ValueType = Values.begin()->second->getType();
6806
6807 // Build up the table contents.
6808 SmallVector<Constant *, 64> TableContents(TableSize);
6809 for (const auto &[CaseVal, CaseRes] : Values) {
6810 assert(CaseRes->getType() == ValueType);
6811
6812 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6813 TableContents[Idx] = CaseRes;
6814
6815 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6816 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6817 }
6818
6819 // Fill in any holes in the table with the default result.
6820 if (Values.size() < TableSize) {
6821 assert(DefaultValue &&
6822 "Need a default value to fill the lookup table holes.");
6823 assert(DefaultValue->getType() == ValueType);
6824 for (uint64_t I = 0; I < TableSize; ++I) {
6825 if (!TableContents[I])
6826 TableContents[I] = DefaultValue;
6827 }
6828
6829 // If the default value is poison, all the holes are poison.
6830 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6831
6832 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6833 SingleValue = nullptr;
6834 }
6835
6836 // If each element in the table contains the same value, we only need to store
6837 // that single value.
6838 if (SingleValue) {
6839 Kind = SingleValueKind;
6840 return;
6841 }
6842
6843 // Check if we can derive the value with a linear transformation from the
6844 // table index.
6846 bool LinearMappingPossible = true;
6847 APInt PrevVal;
6848 APInt DistToPrev;
6849 // When linear map is monotonic and signed overflow doesn't happen on
6850 // maximum index, we can attach nsw on Add and Mul.
6851 bool NonMonotonic = false;
6852 assert(TableSize >= 2 && "Should be a SingleValue table.");
6853 // Check if there is the same distance between two consecutive values.
6854 for (uint64_t I = 0; I < TableSize; ++I) {
6855 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6856
6857 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6858 // This is an poison, so it's (probably) a lookup table hole.
6859 // To prevent any regressions from before we switched to using poison as
6860 // the default value, holes will fall back to using the first value.
6861 // This can be removed once we add proper handling for poisons in lookup
6862 // tables.
6863 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6864 }
6865
6866 if (!ConstVal) {
6867 // This is an undef. We could deal with it, but undefs in lookup tables
6868 // are very seldom. It's probably not worth the additional complexity.
6869 LinearMappingPossible = false;
6870 break;
6871 }
6872 const APInt &Val = ConstVal->getValue();
6873 if (I != 0) {
6874 APInt Dist = Val - PrevVal;
6875 if (I == 1) {
6876 DistToPrev = Dist;
6877 } else if (Dist != DistToPrev) {
6878 LinearMappingPossible = false;
6879 break;
6880 }
6881 NonMonotonic |=
6882 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6883 }
6884 PrevVal = Val;
6885 }
6886 if (LinearMappingPossible) {
6887 LinearOffset = cast<ConstantInt>(TableContents[0]);
6888 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6889 APInt M = LinearMultiplier->getValue();
6890 bool MayWrap = true;
6891 if (isIntN(M.getBitWidth(), TableSize - 1))
6892 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6893 LinearMapValWrapped = NonMonotonic || MayWrap;
6894 Kind = LinearMapKind;
6895 return;
6896 }
6897 }
6898
6899 // If the type is integer and the table fits in a register, build a bitmap.
6900 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6902 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6903 for (uint64_t I = TableSize; I > 0; --I) {
6904 TableInt <<= IT->getBitWidth();
6905 // Insert values into the bitmap. Undef values are set to zero.
6906 if (!isa<UndefValue>(TableContents[I - 1])) {
6907 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6908 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6909 }
6910 }
6911 BitMap = ConstantInt::get(M.getContext(), TableInt);
6912 BitMapElementTy = IT;
6913 Kind = BitMapKind;
6914 return;
6915 }
6916
6917 // Store the table in an array.
6918 auto *TableTy = ArrayType::get(ValueType, TableSize);
6919 Initializer = ConstantArray::get(TableTy, TableContents);
6920
6921 Kind = LookupTableKind;
6922}
6923
6924Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6925 const DataLayout &DL, Function *Func) {
6926 switch (Kind) {
6927 case SingleValueKind:
6928 return SingleValue;
6929 case LinearMapKind: {
6930 ++NumLinearMaps;
6931 // Derive the result value from the input value.
6932 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6933 false, "switch.idx.cast");
6934 if (!LinearMultiplier->isOne())
6935 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6936 /*HasNUW = */ false,
6937 /*HasNSW = */ !LinearMapValWrapped);
6938
6939 if (!LinearOffset->isZero())
6940 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6941 /*HasNUW = */ false,
6942 /*HasNSW = */ !LinearMapValWrapped);
6943 return Result;
6944 }
6945 case BitMapKind: {
6946 ++NumBitMaps;
6947 // Type of the bitmap (e.g. i59).
6948 IntegerType *MapTy = BitMap->getIntegerType();
6949
6950 // Cast Index to the same type as the bitmap.
6951 // Note: The Index is <= the number of elements in the table, so
6952 // truncating it to the width of the bitmask is safe.
6953 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6954
6955 // Multiply the shift amount by the element width. NUW/NSW can always be
6956 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6957 // BitMap's bit width.
6958 ShiftAmt = Builder.CreateMul(
6959 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6960 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6961
6962 // Shift down.
6963 Value *DownShifted =
6964 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6965 // Mask off.
6966 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6967 }
6968 case LookupTableKind: {
6969 ++NumLookupTables;
6970 auto *Table =
6971 new GlobalVariable(*Func->getParent(), Initializer->getType(),
6972 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
6973 Initializer, "switch.table." + Func->getName());
6974 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6975 // Set the alignment to that of an array items. We will be only loading one
6976 // value out of it.
6977 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
6978 Type *IndexTy = DL.getIndexType(Table->getType());
6979 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
6980
6981 if (Index->getType() != IndexTy) {
6982 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
6983 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
6984 if (auto *Zext = dyn_cast<ZExtInst>(Index))
6985 Zext->setNonNeg(
6986 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
6987 }
6988
6989 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
6990 Value *GEP =
6991 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
6992 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
6993 }
6994 }
6995 llvm_unreachable("Unknown helper kind!");
6996}
6997
6998bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
6999 uint64_t TableSize,
7000 Type *ElementType) {
7001 auto *IT = dyn_cast<IntegerType>(ElementType);
7002 if (!IT)
7003 return false;
7004 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7005 // are <= 15, we could try to narrow the type.
7006
7007 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7008 if (TableSize >= UINT_MAX / IT->getBitWidth())
7009 return false;
7010 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7011}
7012
7014 const DataLayout &DL) {
7015 // Allow any legal type.
7016 if (TTI.isTypeLegal(Ty))
7017 return true;
7018
7019 auto *IT = dyn_cast<IntegerType>(Ty);
7020 if (!IT)
7021 return false;
7022
7023 // Also allow power of 2 integer types that have at least 8 bits and fit in
7024 // a register. These types are common in frontend languages and targets
7025 // usually support loads of these types.
7026 // TODO: We could relax this to any integer that fits in a register and rely
7027 // on ABI alignment and padding in the table to allow the load to be widened.
7028 // Or we could widen the constants and truncate the load.
7029 unsigned BitWidth = IT->getBitWidth();
7030 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7031 DL.fitsInLegalInteger(IT->getBitWidth());
7032}
7033
7034Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7035
7036bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7037
7038bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7039
7040static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7041 // 40% is the default density for building a jump table in optsize/minsize
7042 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7043 // function was based on.
7044 const uint64_t MinDensity = 40;
7045
7046 if (CaseRange >= UINT64_MAX / 100)
7047 return false; // Avoid multiplication overflows below.
7048
7049 return NumCases * 100 >= CaseRange * MinDensity;
7050}
7051
7053 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7054 uint64_t Range = Diff + 1;
7055 if (Range < Diff)
7056 return false; // Overflow.
7057
7058 return isSwitchDense(Values.size(), Range);
7059}
7060
7061/// Determine whether a lookup table should be built for this switch, based on
7062/// the number of cases, size of the table, and the types of the results.
7063// TODO: We could support larger than legal types by limiting based on the
7064// number of loads required and/or table size. If the constants are small we
7065// could use smaller table entries and extend after the load.
7067 const TargetTransformInfo &TTI,
7068 const DataLayout &DL,
7069 const SmallVector<Type *> &ResultTypes) {
7070 if (SI->getNumCases() > TableSize)
7071 return false; // TableSize overflowed.
7072
7073 bool AllTablesFitInRegister = true;
7074 bool HasIllegalType = false;
7075 for (const auto &Ty : ResultTypes) {
7076 // Saturate this flag to true.
7077 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7078
7079 // Saturate this flag to false.
7080 AllTablesFitInRegister =
7081 AllTablesFitInRegister &&
7082 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7083
7084 // If both flags saturate, we're done. NOTE: This *only* works with
7085 // saturating flags, and all flags have to saturate first due to the
7086 // non-deterministic behavior of iterating over a dense map.
7087 if (HasIllegalType && !AllTablesFitInRegister)
7088 break;
7089 }
7090
7091 // If each table would fit in a register, we should build it anyway.
7092 if (AllTablesFitInRegister)
7093 return true;
7094
7095 // Don't build a table that doesn't fit in-register if it has illegal types.
7096 if (HasIllegalType)
7097 return false;
7098
7099 return isSwitchDense(SI->getNumCases(), TableSize);
7100}
7101
7103 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7104 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7105 const DataLayout &DL, const TargetTransformInfo &TTI) {
7106 if (MinCaseVal.isNullValue())
7107 return true;
7108 if (MinCaseVal.isNegative() ||
7109 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7110 !HasDefaultResults)
7111 return false;
7112 return all_of(ResultTypes, [&](const auto &ResultType) {
7113 return SwitchReplacement::wouldFitInRegister(
7114 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7115 });
7116}
7117
7118/// Try to reuse the switch table index compare. Following pattern:
7119/// \code
7120/// if (idx < tablesize)
7121/// r = table[idx]; // table does not contain default_value
7122/// else
7123/// r = default_value;
7124/// if (r != default_value)
7125/// ...
7126/// \endcode
7127/// Is optimized to:
7128/// \code
7129/// cond = idx < tablesize;
7130/// if (cond)
7131/// r = table[idx];
7132/// else
7133/// r = default_value;
7134/// if (cond)
7135/// ...
7136/// \endcode
7137/// Jump threading will then eliminate the second if(cond).
7139 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7140 Constant *DefaultValue,
7141 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7143 if (!CmpInst)
7144 return;
7145
7146 // We require that the compare is in the same block as the phi so that jump
7147 // threading can do its work afterwards.
7148 if (CmpInst->getParent() != PhiBlock)
7149 return;
7150
7152 if (!CmpOp1)
7153 return;
7154
7155 Value *RangeCmp = RangeCheckBranch->getCondition();
7156 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7157 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7158
7159 // Check if the compare with the default value is constant true or false.
7160 const DataLayout &DL = PhiBlock->getDataLayout();
7162 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7163 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7164 return;
7165
7166 // Check if the compare with the case values is distinct from the default
7167 // compare result.
7168 for (auto ValuePair : Values) {
7170 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7171 if (!CaseConst || CaseConst == DefaultConst ||
7172 (CaseConst != TrueConst && CaseConst != FalseConst))
7173 return;
7174 }
7175
7176 // Check if the branch instruction dominates the phi node. It's a simple
7177 // dominance check, but sufficient for our needs.
7178 // Although this check is invariant in the calling loops, it's better to do it
7179 // at this late stage. Practically we do it at most once for a switch.
7180 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7181 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7182 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7183 return;
7184 }
7185
7186 if (DefaultConst == FalseConst) {
7187 // The compare yields the same result. We can replace it.
7188 CmpInst->replaceAllUsesWith(RangeCmp);
7189 ++NumTableCmpReuses;
7190 } else {
7191 // The compare yields the same result, just inverted. We can replace it.
7192 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7193 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7194 RangeCheckBranch->getIterator());
7195 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7196 ++NumTableCmpReuses;
7197 }
7198}
7199
7200/// If the switch is only used to initialize one or more phi nodes in a common
7201/// successor block with different constant values, replace the switch with
7202/// lookup tables.
7204 DomTreeUpdater *DTU, const DataLayout &DL,
7205 const TargetTransformInfo &TTI,
7206 bool ConvertSwitchToLookupTable) {
7207 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7208
7209 BasicBlock *BB = SI->getParent();
7210 Function *Fn = BB->getParent();
7211
7212 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7213 // split off a dense part and build a lookup table for that.
7214
7215 // FIXME: This creates arrays of GEPs to constant strings, which means each
7216 // GEP needs a runtime relocation in PIC code. We should just build one big
7217 // string and lookup indices into that.
7218
7219 // Ignore switches with less than three cases. Lookup tables will not make
7220 // them faster, so we don't analyze them.
7221 if (SI->getNumCases() < 3)
7222 return false;
7223
7224 // Figure out the corresponding result for each case value and phi node in the
7225 // common destination, as well as the min and max case values.
7226 assert(!SI->cases().empty());
7227 SwitchInst::CaseIt CI = SI->case_begin();
7228 ConstantInt *MinCaseVal = CI->getCaseValue();
7229 ConstantInt *MaxCaseVal = CI->getCaseValue();
7230
7231 BasicBlock *CommonDest = nullptr;
7232
7233 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7235
7237 SmallVector<Type *> ResultTypes;
7239
7240 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7241 ConstantInt *CaseVal = CI->getCaseValue();
7242 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7243 MinCaseVal = CaseVal;
7244 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7245 MaxCaseVal = CaseVal;
7246
7247 // Resulting value at phi nodes for this case value.
7249 ResultsTy Results;
7250 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7251 Results, DL, TTI))
7252 return false;
7253
7254 // Append the result and result types from this case to the list for each
7255 // phi.
7256 for (const auto &I : Results) {
7257 PHINode *PHI = I.first;
7258 Constant *Value = I.second;
7259 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7260 if (Inserted)
7261 PHIs.push_back(PHI);
7262 It->second.push_back(std::make_pair(CaseVal, Value));
7263 ResultTypes.push_back(PHI->getType());
7264 }
7265 }
7266
7267 // If the table has holes, we need a constant result for the default case
7268 // or a bitmask that fits in a register.
7269 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7270 bool HasDefaultResults =
7271 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7272 DefaultResultsList, DL, TTI);
7273 for (const auto &I : DefaultResultsList) {
7274 PHINode *PHI = I.first;
7275 Constant *Result = I.second;
7276 DefaultResults[PHI] = Result;
7277 }
7278
7279 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7280 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7281 uint64_t TableSize;
7282 ConstantInt *TableIndexOffset;
7283 if (UseSwitchConditionAsTableIndex) {
7284 TableSize = MaxCaseVal->getLimitedValue() + 1;
7285 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7286 } else {
7287 TableSize =
7288 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7289
7290 TableIndexOffset = MinCaseVal;
7291 }
7292
7293 // If the default destination is unreachable, or if the lookup table covers
7294 // all values of the conditional variable, branch directly to the lookup table
7295 // BB. Otherwise, check that the condition is within the case range.
7296 uint64_t NumResults = ResultLists[PHIs[0]].size();
7297 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7298
7299 bool TableHasHoles = (NumResults < TableSize);
7300
7301 // If the table has holes but the default destination doesn't produce any
7302 // constant results, the lookup table entries corresponding to the holes will
7303 // contain poison.
7304 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7305
7306 // If the default destination doesn't produce a constant result but is still
7307 // reachable, and the lookup table has holes, we need to use a mask to
7308 // determine if the current index should load from the lookup table or jump
7309 // to the default case.
7310 // The mask is unnecessary if the table has holes but the default destination
7311 // is unreachable, as in that case the holes must also be unreachable.
7312 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7313 if (NeedMask) {
7314 // As an extra penalty for the validity test we require more cases.
7315 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7316 return false;
7317 if (!DL.fitsInLegalInteger(TableSize))
7318 return false;
7319 }
7320
7321 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7322 return false;
7323
7324 // Compute the table index value.
7325 Value *TableIndex;
7326 if (UseSwitchConditionAsTableIndex) {
7327 TableIndex = SI->getCondition();
7328 if (HasDefaultResults) {
7329 // Grow the table to cover all possible index values to avoid the range
7330 // check. It will use the default result to fill in the table hole later,
7331 // so make sure it exist.
7332 ConstantRange CR = computeConstantRange(TableIndex, /*ForSigned=*/false,
7333 SimplifyQuery(DL));
7334 // Grow the table shouldn't have any size impact by checking
7335 // wouldFitInRegister.
7336 // TODO: Consider growing the table also when it doesn't fit in a register
7337 // if no optsize is specified.
7338 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7339 if (!CR.isUpperWrapped() &&
7340 all_of(ResultTypes, [&](const auto &ResultType) {
7341 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7342 ResultType);
7343 })) {
7344 // There may be some case index larger than the UpperBound (unreachable
7345 // case), so make sure the table size does not get smaller.
7346 TableSize = std::max(UpperBound, TableSize);
7347 // The default branch is unreachable after we enlarge the lookup table.
7348 // Adjust DefaultIsReachable to reuse code path.
7349 DefaultIsReachable = false;
7350 }
7351 }
7352 }
7353
7354 // Keep track of the switch replacement for each phi
7356 for (PHINode *PHI : PHIs) {
7357 const auto &ResultList = ResultLists[PHI];
7358
7359 Type *ResultType = ResultList.begin()->second->getType();
7360 // Use any value to fill the lookup table holes.
7361 Constant *DefaultVal =
7362 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7363 StringRef FuncName = Fn->getName();
7364 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7365 ResultList, DefaultVal, DL, FuncName);
7366 PhiToReplacementMap.insert({PHI, Replacement});
7367 }
7368
7369 bool AnyLookupTables = any_of(
7370 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7371 bool AnyBitMaps = any_of(PhiToReplacementMap,
7372 [](auto &KV) { return KV.second.isBitMap(); });
7373
7374 // A few conditions prevent the generation of lookup tables:
7375 // 1. The target does not support lookup tables.
7376 // 2. The "no-jump-tables" function attribute is set.
7377 // However, these objections do not apply to other switch replacements, like
7378 // the bitmap, so we only stop here if any of these conditions are met and we
7379 // want to create a LUT. Otherwise, continue with the switch replacement.
7380 if (AnyLookupTables &&
7381 (!TTI.shouldBuildLookupTables() ||
7382 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7383 return false;
7384
7385 // In the early optimization pipeline, disable formation of lookup tables,
7386 // bit maps and mask checks, as they may inhibit further optimization.
7387 if (!ConvertSwitchToLookupTable &&
7388 (AnyLookupTables || AnyBitMaps || NeedMask))
7389 return false;
7390
7391 Builder.SetInsertPoint(SI);
7392 // TableIndex is the switch condition - TableIndexOffset if we don't
7393 // use the condition directly
7394 if (!UseSwitchConditionAsTableIndex) {
7395 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7396 // we can try to attach nsw.
7397 bool MayWrap = true;
7398 if (!DefaultIsReachable) {
7399 APInt Res =
7400 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7401 (void)Res;
7402 }
7403 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7404 "switch.tableidx", /*HasNUW =*/false,
7405 /*HasNSW =*/!MayWrap);
7406 }
7407
7408 std::vector<DominatorTree::UpdateType> Updates;
7409
7410 // Compute the maximum table size representable by the integer type we are
7411 // switching upon.
7412 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7413 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7414 assert(MaxTableSize >= TableSize &&
7415 "It is impossible for a switch to have more entries than the max "
7416 "representable value of its input integer type's size.");
7417
7418 // Create the BB that does the lookups.
7419 Module &Mod = *CommonDest->getParent()->getParent();
7420 BasicBlock *LookupBB = BasicBlock::Create(
7421 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7422
7423 CondBrInst *RangeCheckBranch = nullptr;
7424 CondBrInst *CondBranch = nullptr;
7425
7426 Builder.SetInsertPoint(SI);
7427 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7428 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7429 Builder.CreateBr(LookupBB);
7430 if (DTU)
7431 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7432 // Note: We call removeProdecessor later since we need to be able to get the
7433 // PHI value for the default case in case we're using a bit mask.
7434 } else {
7435 Value *Cmp = Builder.CreateICmpULT(
7436 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7437 RangeCheckBranch =
7438 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7439 CondBranch = RangeCheckBranch;
7440 if (DTU)
7441 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7442 }
7443
7444 // Populate the BB that does the lookups.
7445 Builder.SetInsertPoint(LookupBB);
7446
7447 if (NeedMask) {
7448 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7449 // re-purposed to do the hole check, and we create a new LookupBB.
7450 BasicBlock *MaskBB = LookupBB;
7451 MaskBB->setName("switch.hole_check");
7452 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7453 CommonDest->getParent(), CommonDest);
7454
7455 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7456 // unnecessary illegal types.
7457 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7458 APInt MaskInt(TableSizePowOf2, 0);
7459 APInt One(TableSizePowOf2, 1);
7460 // Build bitmask; fill in a 1 bit for every case.
7461 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7462 for (const auto &Result : ResultList) {
7463 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7464 .getLimitedValue();
7465 MaskInt |= One << Idx;
7466 }
7467 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7468
7469 // Get the TableIndex'th bit of the bitmask.
7470 // If this bit is 0 (meaning hole) jump to the default destination,
7471 // else continue with table lookup.
7472 IntegerType *MapTy = TableMask->getIntegerType();
7473 Value *MaskIndex =
7474 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7475 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7476 Value *LoBit = Builder.CreateTrunc(
7477 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7478 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7479 if (DTU) {
7480 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7481 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7482 }
7483 Builder.SetInsertPoint(LookupBB);
7484 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7485 }
7486
7487 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7488 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7489 // do not delete PHINodes here.
7490 SI->getDefaultDest()->removePredecessor(BB,
7491 /*KeepOneInputPHIs=*/true);
7492 if (DTU)
7493 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7494 }
7495
7496 for (PHINode *PHI : PHIs) {
7497 const ResultListTy &ResultList = ResultLists[PHI];
7498 auto Replacement = PhiToReplacementMap.at(PHI);
7499 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7500 // Do a small peephole optimization: re-use the switch table compare if
7501 // possible.
7502 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7503 BasicBlock *PhiBlock = PHI->getParent();
7504 // Search for compare instructions which use the phi.
7505 for (auto *User : PHI->users()) {
7506 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7507 Replacement.getDefaultValue(), ResultList);
7508 }
7509 }
7510
7511 PHI->addIncoming(Result, LookupBB);
7512 }
7513
7514 Builder.CreateBr(CommonDest);
7515 if (DTU)
7516 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7517
7518 SmallVector<uint32_t> BranchWeights;
7519 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7520 extractBranchWeights(*SI, BranchWeights);
7521 uint64_t ToLookupWeight = 0;
7522 uint64_t ToDefaultWeight = 0;
7523
7524 // Remove the switch.
7525 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7526 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7527 BasicBlock *Succ = SI->getSuccessor(I);
7528
7529 if (Succ == SI->getDefaultDest()) {
7530 if (HasBranchWeights)
7531 ToDefaultWeight += BranchWeights[I];
7532 continue;
7533 }
7534 Succ->removePredecessor(BB);
7535 if (DTU && RemovedSuccessors.insert(Succ).second)
7536 Updates.push_back({DominatorTree::Delete, BB, Succ});
7537 if (HasBranchWeights)
7538 ToLookupWeight += BranchWeights[I];
7539 }
7540 SI->eraseFromParent();
7541 if (HasBranchWeights)
7542 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7543 /*IsExpected=*/false);
7544 if (DTU)
7545 DTU->applyUpdates(Updates);
7546
7547 if (NeedMask)
7548 ++NumLookupTablesHoles;
7549 return true;
7550}
7551
7552/// Try to transform a switch that has "holes" in it to a contiguous sequence
7553/// of cases.
7554///
7555/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7556/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7557///
7558/// This converts a sparse switch into a dense switch which allows better
7559/// lowering and could also allow transforming into a lookup table.
7561 const DataLayout &DL,
7562 const TargetTransformInfo &TTI) {
7563 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7564 if (CondTy->getIntegerBitWidth() > 64 ||
7565 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7566 return false;
7567 // Only bother with this optimization if there are more than 3 switch cases;
7568 // SDAG will only bother creating jump tables for 4 or more cases.
7569 if (SI->getNumCases() < 4)
7570 return false;
7571
7572 // This transform is agnostic to the signedness of the input or case values. We
7573 // can treat the case values as signed or unsigned. We can optimize more common
7574 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7575 // as signed.
7577 for (const auto &C : SI->cases())
7578 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7579 llvm::sort(Values);
7580
7581 // If the switch is already dense, there's nothing useful to do here.
7582 if (isSwitchDense(Values))
7583 return false;
7584
7585 // First, transform the values such that they start at zero and ascend.
7586 int64_t Base = Values[0];
7587 for (auto &V : Values)
7588 V -= (uint64_t)(Base);
7589
7590 // Now we have signed numbers that have been shifted so that, given enough
7591 // precision, there are no negative values. Since the rest of the transform
7592 // is bitwise only, we switch now to an unsigned representation.
7593
7594 // This transform can be done speculatively because it is so cheap - it
7595 // results in a single rotate operation being inserted.
7596
7597 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7598 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7599 // less than 64.
7600 unsigned Shift = 64;
7601 for (auto &V : Values)
7602 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7603 assert(Shift < 64);
7604 if (Shift > 0)
7605 for (auto &V : Values)
7606 V = (int64_t)((uint64_t)V >> Shift);
7607
7608 if (!isSwitchDense(Values))
7609 // Transform didn't create a dense switch.
7610 return false;
7611
7612 // The obvious transform is to shift the switch condition right and emit a
7613 // check that the condition actually cleanly divided by GCD, i.e.
7614 // C & (1 << Shift - 1) == 0
7615 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7616 //
7617 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7618 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7619 // are nonzero then the switch condition will be very large and will hit the
7620 // default case.
7621
7622 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7623 Builder.SetInsertPoint(SI);
7624 Value *Sub =
7625 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7626 Value *Rot = Builder.CreateIntrinsic(
7627 Ty, Intrinsic::fshl,
7628 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7629 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7630
7631 for (auto Case : SI->cases()) {
7632 auto *Orig = Case.getCaseValue();
7633 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7634 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7635 }
7636 return true;
7637}
7638
7639/// Tries to transform the switch when the condition is umin with a constant.
7640/// In that case, the default branch can be replaced by the constant's branch.
7641/// This method also removes dead cases when the simplification cannot replace
7642/// the default branch.
7643///
7644/// For example:
7645/// switch(umin(a, 3)) {
7646/// case 0:
7647/// case 1:
7648/// case 2:
7649/// case 3:
7650/// case 4:
7651/// // ...
7652/// default:
7653/// unreachable
7654/// }
7655///
7656/// Transforms into:
7657///
7658/// switch(a) {
7659/// case 0:
7660/// case 1:
7661/// case 2:
7662/// default:
7663/// // This is case 3
7664/// }
7666 Value *A;
7668
7669 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7670 return false;
7671
7674 BasicBlock *BB = SIW->getParent();
7675
7676 // Dead cases are removed even when the simplification fails.
7677 // A case is dead when its value is higher than the Constant.
7678 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7679 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7680 ++I;
7681 continue;
7682 }
7683 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7684 DeadCaseBB->removePredecessor(BB);
7685 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7686 I = SIW.removeCase(I);
7687 E = SIW->case_end();
7688 }
7689
7690 auto Case = SI->findCaseValue(Constant);
7691 // If the case value is not found, `findCaseValue` returns the default case.
7692 // In this scenario, since there is no explicit `case 3:`, the simplification
7693 // fails. The simplification also fails when the switch’s default destination
7694 // is reachable.
7695 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7696 if (DTU)
7697 DTU->applyUpdates(Updates);
7698 return !Updates.empty();
7699 }
7700
7701 BasicBlock *Unreachable = SI->getDefaultDest();
7702 SIW.replaceDefaultDest(Case);
7703 SIW.removeCase(Case);
7704 SIW->setCondition(A);
7705
7706 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7707
7708 if (DTU)
7709 DTU->applyUpdates(Updates);
7710
7711 return true;
7712}
7713
7714/// Tries to transform switch of powers of two to reduce switch range.
7715/// For example, switch like:
7716/// switch (C) { case 1: case 2: case 64: case 128: }
7717/// will be transformed to:
7718/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7719///
7720/// This transformation allows better lowering and may transform the switch
7721/// instruction into a sequence of bit manipulation and a smaller
7722/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7723/// address of the jump target, and indirectly jump to it).
7725 DomTreeUpdater *DTU,
7726 const DataLayout &DL,
7727 const TargetTransformInfo &TTI) {
7728 Value *Condition = SI->getCondition();
7729 LLVMContext &Context = SI->getContext();
7730 auto *CondTy = cast<IntegerType>(Condition->getType());
7731
7732 if (CondTy->getIntegerBitWidth() > 64 ||
7733 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7734 return false;
7735
7736 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7737 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7738 {Condition, ConstantInt::getTrue(Context)});
7739 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7740 TTI::TCC_Basic * 2)
7741 return false;
7742
7743 // Only bother with this optimization if there are more than 3 switch cases.
7744 // SDAG will start emitting jump tables for 4 or more cases.
7745 if (SI->getNumCases() < 4)
7746 return false;
7747
7748 // Check that switch cases are powers of two.
7750 for (const auto &Case : SI->cases()) {
7751 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7752 if (llvm::has_single_bit(CaseValue))
7753 Values.push_back(CaseValue);
7754 else
7755 return false;
7756 }
7757
7758 // isSwichDense requires case values to be sorted.
7759 llvm::sort(Values);
7760 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7761 llvm::countr_zero(Values.front()) + 1))
7762 // Transform is unable to generate dense switch.
7763 return false;
7764
7765 Builder.SetInsertPoint(SI);
7766
7767 if (!SI->defaultDestUnreachable()) {
7768 // Let non-power-of-two inputs jump to the default case, when the latter is
7769 // reachable.
7770 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7771 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7772
7773 auto *OrigBB = SI->getParent();
7774 auto *DefaultCaseBB = SI->getDefaultDest();
7775 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7776 auto It = OrigBB->getTerminator()->getIterator();
7777 SmallVector<uint32_t> Weights;
7778 auto HasWeights =
7780 auto *BI = CondBrInst::Create(IsPow2, SplitBB, DefaultCaseBB, It);
7781 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7782 // IsPow2 covers a subset of the cases in which we'd go to the default
7783 // label. The other is those powers of 2 that don't appear in the case
7784 // statement. We don't know the distribution of the values coming in, so
7785 // the safest is to split 50-50 the original probability to `default`.
7786 uint64_t OrigDenominator =
7788 SmallVector<uint64_t> NewWeights(2);
7789 NewWeights[1] = Weights[0] / 2;
7790 NewWeights[0] = OrigDenominator - NewWeights[1];
7791 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7792 // The probability of executing the default block stays constant. It was
7793 // p_d = Weights[0] / OrigDenominator
7794 // we rewrite as W/D
7795 // We want to find the probability of the default branch of the switch
7796 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7797 // i.e. the original probability is the probability we go to the default
7798 // branch from the BI branch, or we take the default branch on the SI.
7799 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7800 // This matches using W/2 for the default branch probability numerator and
7801 // D-W/2 as the denominator.
7802 Weights[0] = NewWeights[1];
7803 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7804 for (auto &W : drop_begin(Weights))
7805 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7806
7807 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7808 }
7809 // BI is handling the default case for SI, and so should share its DebugLoc.
7810 BI->setDebugLoc(SI->getDebugLoc());
7811 It->eraseFromParent();
7812
7813 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7814 if (DTU)
7815 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7816 }
7817
7818 // Replace each case with its trailing zeros number.
7819 for (auto &Case : SI->cases()) {
7820 auto *OrigValue = Case.getCaseValue();
7821 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7822 OrigValue->getValue().countr_zero()));
7823 }
7824
7825 // Replace condition with its trailing zeros number.
7826 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7827 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7828
7829 SI->setCondition(ConditionTrailingZeros);
7830
7831 return true;
7832}
7833
7834/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7835/// the same destination.
7837 DomTreeUpdater *DTU) {
7838 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7839 if (!Cmp || !Cmp->hasOneUse())
7840 return false;
7841
7843 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7844 if (!HasWeights)
7845 Weights.resize(4); // Avoid checking HasWeights everywhere.
7846
7847 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7848 int64_t Res;
7849 BasicBlock *Succ, *OtherSucc;
7850 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7851 BasicBlock *Unreachable = nullptr;
7852
7853 if (SI->getNumCases() == 2) {
7854 // Find which of 1, 0 or -1 is missing (handled by default dest).
7855 SmallSet<int64_t, 3> Missing;
7856 Missing.insert(1);
7857 Missing.insert(0);
7858 Missing.insert(-1);
7859
7860 Succ = SI->getDefaultDest();
7861 SuccWeight = Weights[0];
7862 OtherSucc = nullptr;
7863 for (auto &Case : SI->cases()) {
7864 std::optional<int64_t> Val =
7865 Case.getCaseValue()->getValue().trySExtValue();
7866 if (!Val)
7867 return false;
7868 if (!Missing.erase(*Val))
7869 return false;
7870 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7871 return false;
7872 OtherSucc = Case.getCaseSuccessor();
7873 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7874 }
7875
7876 assert(Missing.size() == 1 && "Should have one case left");
7877 Res = *Missing.begin();
7878 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7879 // Normalize so that Succ is taken once and OtherSucc twice.
7880 Unreachable = SI->getDefaultDest();
7881 Succ = OtherSucc = nullptr;
7882 for (auto &Case : SI->cases()) {
7883 BasicBlock *NewSucc = Case.getCaseSuccessor();
7884 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7885 if (!OtherSucc || OtherSucc == NewSucc) {
7886 OtherSucc = NewSucc;
7887 OtherSuccWeight += Weight;
7888 } else if (!Succ) {
7889 Succ = NewSucc;
7890 SuccWeight = Weight;
7891 } else if (Succ == NewSucc) {
7892 std::swap(Succ, OtherSucc);
7893 std::swap(SuccWeight, OtherSuccWeight);
7894 } else
7895 return false;
7896 }
7897 for (auto &Case : SI->cases()) {
7898 std::optional<int64_t> Val =
7899 Case.getCaseValue()->getValue().trySExtValue();
7900 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7901 return false;
7902 if (Case.getCaseSuccessor() == Succ) {
7903 Res = *Val;
7904 break;
7905 }
7906 }
7907 } else {
7908 return false;
7909 }
7910
7911 // Determine predicate for the missing case.
7913 switch (Res) {
7914 case 1:
7915 Pred = ICmpInst::ICMP_UGT;
7916 break;
7917 case 0:
7918 Pred = ICmpInst::ICMP_EQ;
7919 break;
7920 case -1:
7921 Pred = ICmpInst::ICMP_ULT;
7922 break;
7923 }
7924 if (Cmp->isSigned())
7925 Pred = ICmpInst::getSignedPredicate(Pred);
7926
7927 MDNode *NewWeights = nullptr;
7928 if (HasWeights)
7929 NewWeights = MDBuilder(SI->getContext())
7930 .createBranchWeights(SuccWeight, OtherSuccWeight);
7931
7932 BasicBlock *BB = SI->getParent();
7933 Builder.SetInsertPoint(SI->getIterator());
7934 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7935 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7936 SI->getMetadata(LLVMContext::MD_unpredictable));
7937 OtherSucc->removePredecessor(BB);
7938 if (Unreachable)
7939 Unreachable->removePredecessor(BB);
7940 SI->eraseFromParent();
7941 Cmp->eraseFromParent();
7942 if (DTU && Unreachable)
7943 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7944 return true;
7945}
7946
7947/// Checking whether two BBs are equal depends on the contents of the
7948/// BasicBlock and the incoming values of their successor PHINodes.
7949/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7950/// calling this function on each BasicBlock every time isEqual is called,
7951/// especially since the same BasicBlock may be passed as an argument multiple
7952/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7953/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7954/// of the incoming values.
7957
7958 // One Phi usually has < 8 incoming values.
7962
7963 // We only merge the identical non-entry BBs with
7964 // - terminator unconditional br to Succ (pending relaxation),
7965 // - does not have address taken / weird control.
7966 static bool canBeMerged(const BasicBlock *BB) {
7967 assert(BB && "Expected non-null BB");
7968 // Entry block cannot be eliminated or have predecessors.
7969 if (BB->isEntryBlock())
7970 return false;
7971
7972 // Single successor and must be Succ.
7973 // FIXME: Relax that the terminator is a BranchInst by checking for equality
7974 // on other kinds of terminators. We decide to only support unconditional
7975 // branches for now for compile time reasons.
7976 auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
7977 if (!BI)
7978 return false;
7979
7980 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
7981 // uses.
7982 if (BB->hasAddressTaken() || BB->isEHPad())
7983 return false;
7984
7985 // TODO: relax this condition to merge equal blocks with >1 instructions?
7986 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
7987 if (&BB->front() != &BB->back())
7988 return false;
7989
7990 // The BB must have at least one predecessor.
7991 if (pred_empty(BB))
7992 return false;
7993
7994 return true;
7995 }
7996};
7997
7999 static const EqualBBWrapper *getEmptyKey() {
8000 return static_cast<EqualBBWrapper *>(DenseMapInfo<void *>::getEmptyKey());
8001 }
8003 return static_cast<EqualBBWrapper *>(
8005 }
8006 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8007 BasicBlock *BB = EBW->BB;
8009 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8010
8011 // Since we assume the BB is just a single UncondBrInst with a single
8012 // successor, we hash as the BB and the incoming Values of its successor
8013 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8014 // including the incoming PHI values leads to better performance.
8015 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8016 // time and passing it in EqualBBWrapper, but this slowed down the average
8017 // compile time without having any impact on the worst case compile time.
8018 BasicBlock *Succ = BI->getSuccessor();
8019 auto PhiValsForBB = map_range(Succ->phis(), [&](PHINode &Phi) {
8020 return (*EBW->PhiPredIVs)[&Phi][BB];
8021 });
8022 return hash_combine(Succ, hash_combine_range(PhiValsForBB));
8023 }
8024 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8027 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8028 return LHS == RHS;
8029
8030 BasicBlock *A = LHS->BB;
8031 BasicBlock *B = RHS->BB;
8032
8033 // FIXME: we checked that the size of A and B are both 1 in
8034 // mergeIdenticalUncondBBs to make the Case list smaller to
8035 // improve performance. If we decide to support BasicBlocks with more
8036 // than just a single instruction, we need to check that A.size() ==
8037 // B.size() here, and we need to check more than just the BranchInsts
8038 // for equality.
8039
8040 UncondBrInst *ABI = cast<UncondBrInst>(A->getTerminator());
8041 UncondBrInst *BBI = cast<UncondBrInst>(B->getTerminator());
8042 if (ABI->getSuccessor() != BBI->getSuccessor())
8043 return false;
8044
8045 // Need to check that PHIs in successor have matching values.
8046 BasicBlock *Succ = ABI->getSuccessor();
8047 auto IfPhiIVMatch = [&](PHINode &Phi) {
8048 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8049 // query.
8050 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8051 return PredIVs[A] == PredIVs[B];
8052 };
8053 return all_of(Succ->phis(), IfPhiIVMatch);
8054 }
8055};
8056
8057// Merge identical BBs into one of them.
8059 DomTreeUpdater *DTU) {
8060 if (Candidates.size() < 2)
8061 return false;
8062
8063 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8064 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8065 // an entire PHI at once after the loop, opposed to calling
8066 // getIncomingValueForBlock inside this loop, since each call to
8067 // getIncomingValueForBlock is O(|Preds|).
8068 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8070 BBs2Merge.reserve(Candidates.size());
8072
8073 for (BasicBlock *BB : Candidates) {
8074 BasicBlock *Succ = BB->getSingleSuccessor();
8075 assert(Succ && "Expected unconditional BB");
8076 BBs2Merge.emplace_back(EqualBBWrapper{BB, &PhiPredIVs});
8077 Phis.insert_range(make_pointer_range(Succ->phis()));
8078 }
8079
8080 // Precompute a data structure to improve performance of isEqual for
8081 // EqualBBWrapper.
8082 PhiPredIVs.reserve(Phis.size());
8083 for (PHINode *Phi : Phis) {
8084 auto &IVs =
8085 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8086 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8087 // O(|Pred|).
8088 for (auto &IV : Phi->incoming_values())
8089 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8090 }
8091
8092 // Group duplicates using DenseSet with custom equality/hashing.
8093 // Build a set such that if the EqualBBWrapper exists in the set and another
8094 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8095 // the set should be replaced with the one in the set. If the EqualBBWrapper
8096 // is not in the set, then it should be added to the set so other
8097 // EqualBBWrapper can check against it in the same manner. We use
8098 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8099 // information to isEquality, getHashValue, and when doing the replacement
8100 // with better performance.
8102 Keep.reserve(BBs2Merge.size());
8103
8105 Updates.reserve(BBs2Merge.size() * 2);
8106
8107 bool MadeChange = false;
8108
8109 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8110 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8113 if (DTU) {
8114 // All predecessors of DeadPred (except the common predecessor) will be
8115 // moved to LivePred.
8116 Updates.reserve(Updates.size() + DeadPreds.size() * 2);
8118 predecessors(Live));
8119 for (BasicBlock *PredOfDead : DeadPreds) {
8120 // Do not modify those common predecessors of DeadPred and LivePred.
8121 if (!LivePreds.contains(PredOfDead))
8122 Updates.push_back({DominatorTree::Insert, PredOfDead, Live});
8123 Updates.push_back({DominatorTree::Delete, PredOfDead, Dead});
8124 }
8125 }
8126 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8127 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8128 Live->printAsOperand(dbgs()); dbgs() << " for ";
8129 Live->getSingleSuccessor()->printAsOperand(dbgs());
8130 dbgs() << "\n");
8131 // Replace successors in all predecessors of DeadPred.
8132 for (BasicBlock *PredOfDead : DeadPreds) {
8133 Instruction *T = PredOfDead->getTerminator();
8134 T->replaceSuccessorWith(Dead, Live);
8135 }
8136 };
8137
8138 // Try to eliminate duplicate predecessors.
8139 for (const auto &EBW : BBs2Merge) {
8140 // EBW is a candidate for simplification. If we find a duplicate BB,
8141 // replace it.
8142 const auto &[It, Inserted] = Keep.insert(&EBW);
8143 if (Inserted)
8144 continue;
8145
8146 // Found duplicate: merge P into canonical predecessor It->Pred.
8147 BasicBlock *KeepBB = (*It)->BB;
8148 BasicBlock *DeadBB = EBW.BB;
8149
8150 // Avoid merging a BB with itself.
8151 if (KeepBB == DeadBB)
8152 continue;
8153
8154 // Redirect all edges into DeadPred to KeepPred.
8155 RedirectIncomingEdges(DeadBB, KeepBB);
8156
8157 // Now DeadBB should become unreachable; leave DCE to later,
8158 // but we can try to simplify it if it only branches to Succ.
8159 // (We won't erase here to keep the routine simple and DT-safe.)
8160 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8161 MadeChange = true;
8162 }
8163
8164 if (DTU && !Updates.empty())
8165 DTU->applyUpdates(Updates);
8166
8167 return MadeChange;
8168}
8169
8170bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8171 DomTreeUpdater *DTU) {
8172 // Collect candidate switch-arms top-down.
8173 SmallSetVector<BasicBlock *, 16> FilteredArms(
8176 return mergeIdenticalBBs(FilteredArms.getArrayRef(), DTU);
8177}
8178
8179bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8180 DomTreeUpdater *DTU) {
8181 // Need at least 2 predecessors to do anything.
8182 if (!BB || !BB->hasNPredecessorsOrMore(2))
8183 return false;
8184
8185 // Compilation time consideration: retain the canonical loop, otherwise, we
8186 // require more time in the later loop canonicalization.
8187 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
8188 return false;
8189
8190 // Collect candidate predecessors bottom-up.
8191 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8194 return mergeIdenticalBBs(FilteredPreds.getArrayRef(), DTU);
8195}
8196
8197bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8198 BasicBlock *BB = SI->getParent();
8199
8200 if (isValueEqualityComparison(SI)) {
8201 // If we only have one predecessor, and if it is a branch on this value,
8202 // see if that predecessor totally determines the outcome of this switch.
8203 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8204 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8205 return requestResimplify();
8206
8207 Value *Cond = SI->getCondition();
8208 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8209 if (simplifySwitchOnSelect(SI, Select))
8210 return requestResimplify();
8211
8212 // If the block only contains the switch, see if we can fold the block
8213 // away into any preds.
8214 if (SI == &*BB->begin())
8215 if (foldValueComparisonIntoPredecessors(SI, Builder))
8216 return requestResimplify();
8217 }
8218
8219 // Try to transform the switch into an icmp and a branch.
8220 // The conversion from switch to comparison may lose information on
8221 // impossible switch values, so disable it early in the pipeline.
8222 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8223 return requestResimplify();
8224
8225 // Remove unreachable cases.
8226 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8227 return requestResimplify();
8228
8229 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8230 return requestResimplify();
8231
8232 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8233 return requestResimplify();
8234
8235 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8236 return requestResimplify();
8237
8238 // The conversion of switches to arithmetic or lookup table is disabled in
8239 // the early optimization pipeline, as it may lose information or make the
8240 // resulting code harder to analyze.
8241 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8242 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8243 Options.ConvertSwitchToLookupTable))
8244 return requestResimplify();
8245
8246 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8247 return requestResimplify();
8248
8249 if (reduceSwitchRange(SI, Builder, DL, TTI))
8250 return requestResimplify();
8251
8252 if (HoistCommon &&
8253 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8254 return requestResimplify();
8255
8256 // We can merge identical switch arms early to enhance more aggressive
8257 // optimization on switch.
8258 if (simplifyDuplicateSwitchArms(SI, DTU))
8259 return requestResimplify();
8260
8261 if (simplifySwitchWhenUMin(SI, DTU))
8262 return requestResimplify();
8263
8264 return false;
8265}
8266
8267bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8268 BasicBlock *BB = IBI->getParent();
8269 bool Changed = false;
8270 SmallVector<uint32_t> BranchWeights;
8271 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8272 extractBranchWeights(*IBI, BranchWeights);
8273
8274 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8275 if (HasBranchWeights)
8276 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8277 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8278
8279 // Eliminate redundant destinations.
8280 SmallPtrSet<Value *, 8> Succs;
8281 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8282 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8283 BasicBlock *Dest = IBI->getDestination(I);
8284 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8285 if (!Dest->hasAddressTaken())
8286 RemovedSuccs.insert(Dest);
8287 Dest->removePredecessor(BB);
8288 IBI->removeDestination(I);
8289 --I;
8290 --E;
8291 Changed = true;
8292 }
8293 }
8294
8295 if (DTU) {
8296 std::vector<DominatorTree::UpdateType> Updates;
8297 Updates.reserve(RemovedSuccs.size());
8298 for (auto *RemovedSucc : RemovedSuccs)
8299 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8300 DTU->applyUpdates(Updates);
8301 }
8302
8303 if (IBI->getNumDestinations() == 0) {
8304 // If the indirectbr has no successors, change it to unreachable.
8305 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8307 return true;
8308 }
8309
8310 if (IBI->getNumDestinations() == 1) {
8311 // If the indirectbr has one successor, change it to a direct branch.
8314 return true;
8315 }
8316 if (HasBranchWeights) {
8317 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8318 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8319 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8320 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8321 }
8322 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8323 if (simplifyIndirectBrOnSelect(IBI, SI))
8324 return requestResimplify();
8325 }
8326 return Changed;
8327}
8328
8329/// Given an block with only a single landing pad and a unconditional branch
8330/// try to find another basic block which this one can be merged with. This
8331/// handles cases where we have multiple invokes with unique landing pads, but
8332/// a shared handler.
8333///
8334/// We specifically choose to not worry about merging non-empty blocks
8335/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8336/// practice, the optimizer produces empty landing pad blocks quite frequently
8337/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8338/// sinking in this file)
8339///
8340/// This is primarily a code size optimization. We need to avoid performing
8341/// any transform which might inhibit optimization (such as our ability to
8342/// specialize a particular handler via tail commoning). We do this by not
8343/// merging any blocks which require us to introduce a phi. Since the same
8344/// values are flowing through both blocks, we don't lose any ability to
8345/// specialize. If anything, we make such specialization more likely.
8346///
8347/// TODO - This transformation could remove entries from a phi in the target
8348/// block when the inputs in the phi are the same for the two blocks being
8349/// merged. In some cases, this could result in removal of the PHI entirely.
8351 BasicBlock *BB, DomTreeUpdater *DTU) {
8352 auto Succ = BB->getUniqueSuccessor();
8353 assert(Succ);
8354 // If there's a phi in the successor block, we'd likely have to introduce
8355 // a phi into the merged landing pad block.
8356 if (isa<PHINode>(*Succ->begin()))
8357 return false;
8358
8359 for (BasicBlock *OtherPred : predecessors(Succ)) {
8360 if (BB == OtherPred)
8361 continue;
8362 BasicBlock::iterator I = OtherPred->begin();
8364 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8365 continue;
8366 ++I;
8368 if (!BI2 || !BI2->isIdenticalTo(BI))
8369 continue;
8370
8371 std::vector<DominatorTree::UpdateType> Updates;
8372
8373 // We've found an identical block. Update our predecessors to take that
8374 // path instead and make ourselves dead.
8376 for (BasicBlock *Pred : UniquePreds) {
8377 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8378 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8379 "unexpected successor");
8380 II->setUnwindDest(OtherPred);
8381 if (DTU) {
8382 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8383 Updates.push_back({DominatorTree::Delete, Pred, BB});
8384 }
8385 }
8386
8388 for (BasicBlock *Succ : UniqueSuccs) {
8389 Succ->removePredecessor(BB);
8390 if (DTU)
8391 Updates.push_back({DominatorTree::Delete, BB, Succ});
8392 }
8393
8394 IRBuilder<> Builder(BI);
8395 Builder.CreateUnreachable();
8396 BI->eraseFromParent();
8397 if (DTU)
8398 DTU->applyUpdates(Updates);
8399 return true;
8400 }
8401 return false;
8402}
8403
8404bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8405 IRBuilder<> &Builder) {
8406 BasicBlock *BB = BI->getParent();
8407 BasicBlock *Succ = BI->getSuccessor(0);
8408
8409 // If the Terminator is the only non-phi instruction, simplify the block.
8410 // If LoopHeader is provided, check if the block or its successor is a loop
8411 // header. (This is for early invocations before loop simplify and
8412 // vectorization to keep canonical loop forms for nested loops. These blocks
8413 // can be eliminated when the pass is invoked later in the back-end.)
8414 // Note that if BB has only one predecessor then we do not introduce new
8415 // backedge, so we can eliminate BB.
8416 bool NeedCanonicalLoop =
8417 Options.NeedCanonicalLoop &&
8418 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8419 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8421 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8422 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8423 return true;
8424
8425 // If the only instruction in the block is a seteq/setne comparison against a
8426 // constant, try to simplify the block.
8427 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8428 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8429 ++I;
8430 if (I->isTerminator() &&
8431 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8432 return true;
8433 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8434 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8435 Builder))
8436 return true;
8437 }
8438 }
8439
8440 // See if we can merge an empty landing pad block with another which is
8441 // equivalent.
8442 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8443 ++I;
8444 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8445 return true;
8446 }
8447
8448 return false;
8449}
8450
8452 BasicBlock *PredPred = nullptr;
8453 for (auto *P : predecessors(BB)) {
8454 BasicBlock *PPred = P->getSinglePredecessor();
8455 if (!PPred || (PredPred && PredPred != PPred))
8456 return nullptr;
8457 PredPred = PPred;
8458 }
8459 return PredPred;
8460}
8461
8462/// Fold the following pattern:
8463/// bb0:
8464/// br i1 %cond1, label %bb1, label %bb2
8465/// bb1:
8466/// br i1 %cond2, label %bb3, label %bb4
8467/// bb2:
8468/// br i1 %cond2, label %bb4, label %bb3
8469/// bb3:
8470/// ...
8471/// bb4:
8472/// ...
8473/// into
8474/// bb0:
8475/// %cond = xor i1 %cond1, %cond2
8476/// br i1 %cond, label %bb4, label %bb3
8477/// bb3:
8478/// ...
8479/// bb4:
8480/// ...
8481/// NOTE: %cond2 always dominates the terminator of bb0.
8483 BasicBlock *BB = BI->getParent();
8484 BasicBlock *BB1 = BI->getSuccessor(0);
8485 BasicBlock *BB2 = BI->getSuccessor(1);
8486 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8487 if (Succ == BB)
8488 return false;
8489 if (&Succ->front() != Succ->getTerminator())
8490 return false;
8491 SuccBI = dyn_cast<CondBrInst>(Succ->getTerminator());
8492 if (!SuccBI)
8493 return false;
8494 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8495 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8496 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8497 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8498 };
8499 CondBrInst *BB1BI, *BB2BI;
8500 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8501 return false;
8502
8503 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8504 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8505 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8506 return false;
8507
8508 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8509 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8510 IRBuilder<> Builder(BI);
8511 BI->setCondition(
8512 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8513 BB1->removePredecessor(BB);
8514 BI->setSuccessor(0, BB4);
8515 BB2->removePredecessor(BB);
8516 BI->setSuccessor(1, BB3);
8517 if (DTU) {
8519 Updates.push_back({DominatorTree::Delete, BB, BB1});
8520 Updates.push_back({DominatorTree::Insert, BB, BB4});
8521 Updates.push_back({DominatorTree::Delete, BB, BB2});
8522 Updates.push_back({DominatorTree::Insert, BB, BB3});
8523
8524 DTU->applyUpdates(Updates);
8525 }
8526 bool HasWeight = false;
8527 uint64_t BBTWeight, BBFWeight;
8528 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8529 HasWeight = true;
8530 else
8531 BBTWeight = BBFWeight = 1;
8532 uint64_t BB1TWeight, BB1FWeight;
8533 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8534 HasWeight = true;
8535 else
8536 BB1TWeight = BB1FWeight = 1;
8537 uint64_t BB2TWeight, BB2FWeight;
8538 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8539 HasWeight = true;
8540 else
8541 BB2TWeight = BB2FWeight = 1;
8542 if (HasWeight) {
8543 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8544 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8545 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8546 /*ElideAllZero=*/true);
8547 }
8548 return true;
8549}
8550
8551bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8552 assert(
8554 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8555 "Tautological conditional branch should have been eliminated already.");
8556
8557 BasicBlock *BB = BI->getParent();
8558 if (!Options.SimplifyCondBranch ||
8559 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8560 return false;
8561
8562 // Conditional branch
8563 if (isValueEqualityComparison(BI)) {
8564 // If we only have one predecessor, and if it is a branch on this value,
8565 // see if that predecessor totally determines the outcome of this
8566 // switch.
8567 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8568 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8569 return requestResimplify();
8570
8571 // This block must be empty, except for the setcond inst, if it exists.
8572 // Ignore pseudo intrinsics.
8573 for (auto &I : *BB) {
8574 if (isa<PseudoProbeInst>(I) ||
8575 &I == cast<Instruction>(BI->getCondition()))
8576 continue;
8577 if (&I == BI)
8578 if (foldValueComparisonIntoPredecessors(BI, Builder))
8579 return requestResimplify();
8580 break;
8581 }
8582 }
8583
8584 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8585 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8586 return true;
8587
8588 // If this basic block has dominating predecessor blocks and the dominating
8589 // blocks' conditions imply BI's condition, we know the direction of BI.
8590 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8591 if (Imp) {
8592 // Turn this into a branch on constant.
8593 auto *OldCond = BI->getCondition();
8594 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8595 : ConstantInt::getFalse(BB->getContext());
8596 BI->setCondition(TorF);
8598 return requestResimplify();
8599 }
8600
8601 // If this basic block is ONLY a compare and a branch, and if a predecessor
8602 // branches to us and one of our successors, fold the comparison into the
8603 // predecessor and use logical operations to pick the right destination.
8604 if (Options.SpeculateBlocks &&
8605 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8606 Options.BonusInstThreshold))
8607 return requestResimplify();
8608
8609 // We have a conditional branch to two blocks that are only reachable
8610 // from BI. We know that the condbr dominates the two blocks, so see if
8611 // there is any identical code in the "then" and "else" blocks. If so, we
8612 // can hoist it up to the branching block.
8613 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8614 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8615 if (HoistCommon &&
8616 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8617 return requestResimplify();
8618
8619 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8620 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8621 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8622 auto CanSpeculateConditionalLoadsStores = [&]() {
8623 for (auto *Succ : successors(BB)) {
8624 for (Instruction &I : *Succ) {
8625 if (I.isTerminator()) {
8626 if (I.getNumSuccessors() > 1)
8627 return false;
8628 continue;
8629 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8630 SpeculatedConditionalLoadsStores.size() ==
8632 return false;
8633 }
8634 SpeculatedConditionalLoadsStores.push_back(&I);
8635 }
8636 }
8637 return !SpeculatedConditionalLoadsStores.empty();
8638 };
8639
8640 if (CanSpeculateConditionalLoadsStores()) {
8641 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8642 std::nullopt, nullptr);
8643 return requestResimplify();
8644 }
8645 }
8646 } else {
8647 // If Successor #1 has multiple preds, we may be able to conditionally
8648 // execute Successor #0 if it branches to Successor #1.
8649 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8650 if (Succ0TI->getNumSuccessors() == 1 &&
8651 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8652 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8653 return requestResimplify();
8654 }
8655 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8656 // If Successor #0 has multiple preds, we may be able to conditionally
8657 // execute Successor #1 if it branches to Successor #0.
8658 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8659 if (Succ1TI->getNumSuccessors() == 1 &&
8660 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8661 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8662 return requestResimplify();
8663 }
8664
8665 // If this is a branch on something for which we know the constant value in
8666 // predecessors (e.g. a phi node in the current block), thread control
8667 // through this block.
8668 if (foldCondBranchOnValueKnownInPredecessor(BI))
8669 return requestResimplify();
8670
8671 // Scan predecessor blocks for conditional branches.
8672 for (BasicBlock *Pred : predecessors(BB))
8673 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Pred->getTerminator()))
8674 if (PBI != BI)
8675 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8676 return requestResimplify();
8677
8678 // Look for diamond patterns.
8679 if (MergeCondStores)
8680 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8681 if (CondBrInst *PBI = dyn_cast<CondBrInst>(PrevBB->getTerminator()))
8682 if (PBI != BI)
8683 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8684 return requestResimplify();
8685
8686 // Look for nested conditional branches.
8687 if (mergeNestedCondBranch(BI, DTU))
8688 return requestResimplify();
8689
8690 return false;
8691}
8692
8693/// Check if passing a value to an instruction will cause undefined behavior.
8694static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8695 assert(V->getType() == I->getType() && "Mismatched types");
8697 if (!C)
8698 return false;
8699
8700 if (I->use_empty())
8701 return false;
8702
8703 if (C->isNullValue() || isa<UndefValue>(C)) {
8704 // Only look at the first use we can handle, avoid hurting compile time with
8705 // long uselists
8706 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8707 auto *Use = cast<Instruction>(U.getUser());
8708 // Change this list when we want to add new instructions.
8709 switch (Use->getOpcode()) {
8710 default:
8711 return false;
8712 case Instruction::GetElementPtr:
8713 case Instruction::Ret:
8714 case Instruction::BitCast:
8715 case Instruction::Load:
8716 case Instruction::Store:
8717 case Instruction::Call:
8718 case Instruction::CallBr:
8719 case Instruction::Invoke:
8720 case Instruction::UDiv:
8721 case Instruction::URem:
8722 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8723 // implemented to avoid code complexity as it is unclear how useful such
8724 // logic is.
8725 case Instruction::SDiv:
8726 case Instruction::SRem:
8727 return true;
8728 }
8729 });
8730 if (FindUse == I->use_end())
8731 return false;
8732 auto &Use = *FindUse;
8733 auto *User = cast<Instruction>(Use.getUser());
8734 // Bail out if User is not in the same BB as I or User == I or User comes
8735 // before I in the block. The latter two can be the case if User is a
8736 // PHI node.
8737 if (User->getParent() != I->getParent() || User == I ||
8738 User->comesBefore(I))
8739 return false;
8740
8741 // Now make sure that there are no instructions in between that can alter
8742 // control flow (eg. calls)
8743 auto InstrRange =
8744 make_range(std::next(I->getIterator()), User->getIterator());
8745 if (any_of(InstrRange, [](Instruction &I) {
8747 }))
8748 return false;
8749
8750 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8752 if (GEP->getPointerOperand() == I) {
8753 // The type of GEP may differ from the type of base pointer.
8754 // Bail out on vector GEPs, as they are not handled by other checks.
8755 if (GEP->getType()->isVectorTy())
8756 return false;
8757 // The current base address is null, there are four cases to consider:
8758 // getelementptr (TY, null, 0) -> null
8759 // getelementptr (TY, null, not zero) -> may be modified
8760 // getelementptr inbounds (TY, null, 0) -> null
8761 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8762 // undefined?
8763 if (!GEP->hasAllZeroIndices() &&
8764 (!GEP->isInBounds() ||
8765 NullPointerIsDefined(GEP->getFunction(),
8766 GEP->getPointerAddressSpace())))
8767 PtrValueMayBeModified = true;
8768 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8769 }
8770
8771 // Look through return.
8772 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8773 bool HasNoUndefAttr =
8774 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8775 // Return undefined to a noundef return value is undefined.
8776 if (isa<UndefValue>(C) && HasNoUndefAttr)
8777 return true;
8778 // Return null to a nonnull+noundef return value is undefined.
8779 if (C->isNullValue() && HasNoUndefAttr &&
8780 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8781 return !PtrValueMayBeModified;
8782 }
8783 }
8784
8785 // Load from null is undefined.
8786 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8787 if (!LI->isVolatile())
8788 return !NullPointerIsDefined(LI->getFunction(),
8789 LI->getPointerAddressSpace());
8790
8791 // Store to null is undefined.
8793 if (!SI->isVolatile())
8794 return (!NullPointerIsDefined(SI->getFunction(),
8795 SI->getPointerAddressSpace())) &&
8796 SI->getPointerOperand() == I;
8797
8798 // llvm.assume(false/undef) always triggers immediate UB.
8799 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8800 // Ignore assume operand bundles.
8801 if (I == Assume->getArgOperand(0))
8802 return true;
8803 }
8804
8805 if (auto *CB = dyn_cast<CallBase>(User)) {
8806 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8807 return false;
8808 // A call to null is undefined.
8809 if (CB->getCalledOperand() == I)
8810 return true;
8811
8812 if (CB->isArgOperand(&Use)) {
8813 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8814 // Passing null to a nonnnull+noundef argument is undefined.
8816 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8817 return !PtrValueMayBeModified;
8818 // Passing undef to a noundef argument is undefined.
8819 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8820 return true;
8821 }
8822 }
8823 // Div/Rem by zero is immediate UB
8824 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8825 return true;
8826 }
8827 return false;
8828}
8829
8830/// If BB has an incoming value that will always trigger undefined behavior
8831/// (eg. null pointer dereference), remove the branch leading here.
8833 DomTreeUpdater *DTU,
8834 AssumptionCache *AC) {
8835 for (PHINode &PHI : BB->phis())
8836 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8837 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8838 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8839 Instruction *T = Predecessor->getTerminator();
8840 IRBuilder<> Builder(T);
8841 if (isa<UncondBrInst>(T)) {
8842 BB->removePredecessor(Predecessor);
8843 // Turn unconditional branches into unreachables.
8844 Builder.CreateUnreachable();
8845 T->eraseFromParent();
8846 if (DTU)
8847 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8848 return true;
8849 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(T)) {
8850 BB->removePredecessor(Predecessor);
8851 // Preserve guarding condition in assume, because it might not be
8852 // inferrable from any dominating condition.
8853 Value *Cond = BI->getCondition();
8854 CallInst *Assumption;
8855 if (BI->getSuccessor(0) == BB)
8856 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8857 else
8858 Assumption = Builder.CreateAssumption(Cond);
8859 if (AC)
8860 AC->registerAssumption(cast<AssumeInst>(Assumption));
8861 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8862 : BI->getSuccessor(0));
8863 BI->eraseFromParent();
8864 if (DTU)
8865 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8866 return true;
8867 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8868 // Redirect all branches leading to UB into
8869 // a newly created unreachable block.
8870 BasicBlock *Unreachable = BasicBlock::Create(
8871 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8872 Builder.SetInsertPoint(Unreachable);
8873 // The new block contains only one instruction: Unreachable
8874 Builder.CreateUnreachable();
8875 for (const auto &Case : SI->cases())
8876 if (Case.getCaseSuccessor() == BB) {
8877 BB->removePredecessor(Predecessor);
8878 Case.setSuccessor(Unreachable);
8879 }
8880 if (SI->getDefaultDest() == BB) {
8881 BB->removePredecessor(Predecessor);
8882 SI->setDefaultDest(Unreachable);
8883 }
8884
8885 if (DTU)
8886 DTU->applyUpdates(
8887 { { DominatorTree::Insert, Predecessor, Unreachable },
8888 { DominatorTree::Delete, Predecessor, BB } });
8889 return true;
8890 }
8891 }
8892
8893 return false;
8894}
8895
8896bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8897 bool Changed = false;
8898
8899 assert(BB && BB->getParent() && "Block not embedded in function!");
8900 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8901
8902 // Remove basic blocks that have no predecessors (except the entry block)...
8903 // or that just have themself as a predecessor. These are unreachable.
8904 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8905 BB->getSinglePredecessor() == BB) {
8906 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8907 DeleteDeadBlock(BB, DTU);
8908 return true;
8909 }
8910
8911 // Check to see if we can constant propagate this terminator instruction
8912 // away...
8913 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8914 /*TLI=*/nullptr, DTU);
8915
8916 // Check for and eliminate duplicate PHI nodes in this block.
8918
8919 // Check for and remove branches that will always cause undefined behavior.
8921 return requestResimplify();
8922
8923 // Merge basic blocks into their predecessor if there is only one distinct
8924 // pred, and if there is only one distinct successor of the predecessor, and
8925 // if there are no PHI nodes.
8926 if (MergeBlockIntoPredecessor(BB, DTU))
8927 return true;
8928
8929 if (SinkCommon && Options.SinkCommonInsts) {
8930 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8931 mergeCompatibleInvokes(BB, DTU)) {
8932 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8933 // so we may now how duplicate PHI's.
8934 // Let's rerun EliminateDuplicatePHINodes() first,
8935 // before foldTwoEntryPHINode() potentially converts them into select's,
8936 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8937 return true;
8938 }
8939 // Merge identical predecessors of this block.
8940 if (simplifyDuplicatePredecessors(BB, DTU))
8941 return true;
8942 }
8943
8944 if (Options.SpeculateBlocks &&
8945 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8946 // If there is a trivial two-entry PHI node in this basic block, and we can
8947 // eliminate it, do so now.
8948 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8949 if (PN->getNumIncomingValues() == 2)
8950 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8951 Options.SpeculateUnpredictables))
8952 return true;
8953 }
8954
8955 IRBuilder<> Builder(BB);
8957 Builder.SetInsertPoint(Terminator);
8958 switch (Terminator->getOpcode()) {
8959 case Instruction::UncondBr:
8960 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
8961 break;
8962 case Instruction::CondBr:
8963 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
8964 break;
8965 case Instruction::Resume:
8966 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8967 break;
8968 case Instruction::CleanupRet:
8969 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8970 break;
8971 case Instruction::Switch:
8972 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8973 break;
8974 case Instruction::Unreachable:
8975 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8976 break;
8977 case Instruction::IndirectBr:
8978 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8979 break;
8980 }
8981
8982 return Changed;
8983}
8984
8985bool SimplifyCFGOpt::run(BasicBlock *BB) {
8986 bool Changed = false;
8987
8988 // Repeated simplify BB as long as resimplification is requested.
8989 do {
8990 Resimplify = false;
8991
8992 // Perform one round of simplifcation. Resimplify flag will be set if
8993 // another iteration is requested.
8994 Changed |= simplifyOnce(BB);
8995 } while (Resimplify);
8996
8997 return Changed;
8998}
8999
9002 ArrayRef<WeakVH> LoopHeaders) {
9003 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9004 Options)
9005 .run(BB);
9006}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool isProfitableToSpeculate(const CondBrInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static void hoistConditionalLoadsStores(CondBrInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool mergeIdenticalBBs(ArrayRef< BasicBlock * > Candidates, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2012
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:1106
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:986
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:938
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1297
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:705
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:84
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:123
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:255
static DebugLoc getTemporary()
Definition DebugLoc.h:160
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:179
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
static DebugLoc getDropped()
Definition DebugLoc.h:163
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:223
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2390
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2138
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1238
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2693
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1554
LLVM_ABI CallInst * CreateAssumption(Value *Cond)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2018
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1232
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1866
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1261
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2374
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1918
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1931
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1444
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2232
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2106
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2315
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2484
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1614
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1478
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
size_type size() const
Definition MapVector.h:58
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:310
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
void setSuccessor(BasicBlock *NewSucc)
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1612
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:883
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:808
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_bind< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:203
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
constexpr double e
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2179
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
bool succ_empty(const Instruction *I)
Definition CFG.h:153
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
@ Dead
Unused definition.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1701
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1790
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI CondBrInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
LLVM_ABI void InvertBranch(CondBrInst *PBI, IRBuilderBase &Builder)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3110
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3392
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3899
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1716
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:250
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI bool foldBranchToCommonDest(CondBrInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1595
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1524
@ Keep
No function return thunk.
Definition CodeGen.h:162
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two BBs are equal depends on the contents of the BasicBlock and the incoming values ...
SmallDenseMap< BasicBlock *, Value *, 8 > BB2ValueMap
Phi2IVsMap * PhiPredIVs
DenseMap< PHINode *, BB2ValueMap > Phi2IVsMap
static bool canBeMerged(const BasicBlock *BB)
BasicBlock * BB
static const EqualBBWrapper * getEmptyKey()
static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS)
static unsigned getHashValue(const EqualBBWrapper *EBW)
static const EqualBBWrapper * getTombstoneKey()
An information struct used to provide DenseMap with the various necessary components for a given valu...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334