LLVM 22.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
299 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
300 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
301 bool foldCondBranchOnValueKnownInPredecessor(BranchInst *BI);
302
303 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
304 IRBuilder<> &Builder);
305 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
306 SelectInst *Select,
307 IRBuilder<> &Builder);
308 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
309 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
310 Instruction *TI, Instruction *I1,
311 SmallVectorImpl<Instruction *> &OtherSuccTIs);
312 bool speculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321
322public:
323 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
324 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
325 const SimplifyCFGOptions &Opts)
326 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
327 assert((!DTU || !DTU->hasPostDomTree()) &&
328 "SimplifyCFG is not yet capable of maintaining validity of a "
329 "PostDomTree, so don't ask for it.");
330 }
331
332 bool simplifyOnce(BasicBlock *BB);
333 bool run(BasicBlock *BB);
334
335 // Helper to set Resimplify and return change indication.
336 bool requestResimplify() {
337 Resimplify = true;
338 return true;
339 }
340};
341
342// we synthesize a || b as select a, true, b
343// we synthesize a && b as select a, b, false
344// this function determines if SI is playing one of those roles.
345[[maybe_unused]] bool
346isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
347 return ((isa<ConstantInt>(SI->getTrueValue()) &&
348 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
349 (isa<ConstantInt>(SI->getFalseValue()) &&
350 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
351}
352
353} // end anonymous namespace
354
355/// Return true if all the PHI nodes in the basic block \p BB
356/// receive compatible (identical) incoming values when coming from
357/// all of the predecessor blocks that are specified in \p IncomingBlocks.
358///
359/// Note that if the values aren't exactly identical, but \p EquivalenceSet
360/// is provided, and *both* of the values are present in the set,
361/// then they are considered equal.
363 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
364 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
365 assert(IncomingBlocks.size() == 2 &&
366 "Only for a pair of incoming blocks at the time!");
367
368 // FIXME: it is okay if one of the incoming values is an `undef` value,
369 // iff the other incoming value is guaranteed to be a non-poison value.
370 // FIXME: it is okay if one of the incoming values is a `poison` value.
371 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
372 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
373 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
374 if (IV0 == IV1)
375 return true;
376 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
377 EquivalenceSet->contains(IV1))
378 return true;
379 return false;
380 });
381}
382
383/// Return true if it is safe to merge these two
384/// terminator instructions together.
385static bool
387 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
388 if (SI1 == SI2)
389 return false; // Can't merge with self!
390
391 // It is not safe to merge these two switch instructions if they have a common
392 // successor, and if that successor has a PHI node, and if *that* PHI node has
393 // conflicting incoming values from the two switch blocks.
394 BasicBlock *SI1BB = SI1->getParent();
395 BasicBlock *SI2BB = SI2->getParent();
396
398 bool Fail = false;
399 for (BasicBlock *Succ : successors(SI2BB)) {
400 if (!SI1Succs.count(Succ))
401 continue;
402 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
403 continue;
404 Fail = true;
405 if (FailBlocks)
406 FailBlocks->insert(Succ);
407 else
408 break;
409 }
410
411 return !Fail;
412}
413
414/// Update PHI nodes in Succ to indicate that there will now be entries in it
415/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
416/// will be the same as those coming in from ExistPred, an existing predecessor
417/// of Succ.
418static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
419 BasicBlock *ExistPred,
420 MemorySSAUpdater *MSSAU = nullptr) {
421 for (PHINode &PN : Succ->phis())
422 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
423 if (MSSAU)
424 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
425 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
426}
427
428/// Compute an abstract "cost" of speculating the given instruction,
429/// which is assumed to be safe to speculate. TCC_Free means cheap,
430/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
431/// expensive.
433 const TargetTransformInfo &TTI) {
434 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
435}
436
437/// If we have a merge point of an "if condition" as accepted above,
438/// return true if the specified value dominates the block. We don't handle
439/// the true generality of domination here, just a special case which works
440/// well enough for us.
441///
442/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
443/// see if V (which must be an instruction) and its recursive operands
444/// that do not dominate BB have a combined cost lower than Budget and
445/// are non-trapping. If both are true, the instruction is inserted into the
446/// set and true is returned.
447///
448/// The cost for most non-trapping instructions is defined as 1 except for
449/// Select whose cost is 2.
450///
451/// After this function returns, Cost is increased by the cost of
452/// V plus its non-dominating operands. If that cost is greater than
453/// Budget, false is returned and Cost is undefined.
455 Value *V, BasicBlock *BB, Instruction *InsertPt,
456 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
458 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
459 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
460 // so limit the recursion depth.
461 // TODO: While this recursion limit does prevent pathological behavior, it
462 // would be better to track visited instructions to avoid cycles.
464 return false;
465
467 if (!I) {
468 // Non-instructions dominate all instructions and can be executed
469 // unconditionally.
470 return true;
471 }
472 BasicBlock *PBB = I->getParent();
473
474 // We don't want to allow weird loops that might have the "if condition" in
475 // the bottom of this block.
476 if (PBB == BB)
477 return false;
478
479 // If this instruction is defined in a block that contains an unconditional
480 // branch to BB, then it must be in the 'conditional' part of the "if
481 // statement". If not, it definitely dominates the region.
483 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
484 return true;
485
486 // If we have seen this instruction before, don't count it again.
487 if (AggressiveInsts.count(I))
488 return true;
489
490 // Okay, it looks like the instruction IS in the "condition". Check to
491 // see if it's a cheap instruction to unconditionally compute, and if it
492 // only uses stuff defined outside of the condition. If so, hoist it out.
493 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
494 return false;
495
496 // Overflow arithmetic instruction plus extract value are usually generated
497 // when a division is being replaced. But, in this case, the zero check may
498 // still be kept in the code. In that case it would be worth to hoist these
499 // two instruction out of the basic block. Let's treat this pattern as one
500 // single cheap instruction here!
501 WithOverflowInst *OverflowInst;
502 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
503 ZeroCostInstructions.insert(OverflowInst);
504 Cost += 1;
505 } else if (!ZeroCostInstructions.contains(I))
506 Cost += computeSpeculationCost(I, TTI);
507
508 // Allow exactly one instruction to be speculated regardless of its cost
509 // (as long as it is safe to do so).
510 // This is intended to flatten the CFG even if the instruction is a division
511 // or other expensive operation. The speculation of an expensive instruction
512 // is expected to be undone in CodeGenPrepare if the speculation has not
513 // enabled further IR optimizations.
514 if (Cost > Budget &&
515 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
516 !Cost.isValid()))
517 return false;
518
519 // Okay, we can only really hoist these out if their operands do
520 // not take us over the cost threshold.
521 for (Use &Op : I->operands())
522 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
523 TTI, AC, ZeroCostInstructions, Depth + 1))
524 return false;
525 // Okay, it's safe to do this! Remember this instruction.
526 AggressiveInsts.insert(I);
527 return true;
528}
529
530/// Extract ConstantInt from value, looking through IntToPtr
531/// and PointerNullValue. Return NULL if value is not a constant int.
533 // Normal constant int.
535 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
536 return CI;
537
538 // It is not safe to look through inttoptr or ptrtoint when using unstable
539 // pointer types.
540 if (DL.hasUnstableRepresentation(V->getType()))
541 return nullptr;
542
543 // This is some kind of pointer constant. Turn it into a pointer-sized
544 // ConstantInt if possible.
545 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
546
547 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
549 return ConstantInt::get(IntPtrTy, 0);
550
551 // IntToPtr const int, we can look through this if the semantics of
552 // inttoptr for this address space are a simple (truncating) bitcast.
554 if (CE->getOpcode() == Instruction::IntToPtr)
555 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
556 // The constant is very likely to have the right type already.
557 if (CI->getType() == IntPtrTy)
558 return CI;
559 else
560 return cast<ConstantInt>(
561 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
562 }
563 return nullptr;
564}
565
566namespace {
567
568/// Given a chain of or (||) or and (&&) comparison of a value against a
569/// constant, this will try to recover the information required for a switch
570/// structure.
571/// It will depth-first traverse the chain of comparison, seeking for patterns
572/// like %a == 12 or %a < 4 and combine them to produce a set of integer
573/// representing the different cases for the switch.
574/// Note that if the chain is composed of '||' it will build the set of elements
575/// that matches the comparisons (i.e. any of this value validate the chain)
576/// while for a chain of '&&' it will build the set elements that make the test
577/// fail.
578struct ConstantComparesGatherer {
579 const DataLayout &DL;
580
581 /// Value found for the switch comparison
582 Value *CompValue = nullptr;
583
584 /// Extra clause to be checked before the switch
585 Value *Extra = nullptr;
586
587 /// Set of integers to match in switch
589
590 /// Number of comparisons matched in the and/or chain
591 unsigned UsedICmps = 0;
592
593 /// If the elements in Vals matches the comparisons
594 bool IsEq = false;
595
596 // Used to check if the first matched CompValue shall be the Extra check.
597 bool IgnoreFirstMatch = false;
598 bool MultipleMatches = false;
599
600 /// Construct and compute the result for the comparison instruction Cond
601 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
602 gather(Cond);
603 if (CompValue || !MultipleMatches)
604 return;
605 Extra = nullptr;
606 Vals.clear();
607 UsedICmps = 0;
608 IgnoreFirstMatch = true;
609 gather(Cond);
610 }
611
612 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
613 ConstantComparesGatherer &
614 operator=(const ConstantComparesGatherer &) = delete;
615
616private:
617 /// Try to set the current value used for the comparison, it succeeds only if
618 /// it wasn't set before or if the new value is the same as the old one
619 bool setValueOnce(Value *NewVal) {
620 if (IgnoreFirstMatch) {
621 IgnoreFirstMatch = false;
622 return false;
623 }
624 if (CompValue && CompValue != NewVal) {
625 MultipleMatches = true;
626 return false;
627 }
628 CompValue = NewVal;
629 return true;
630 }
631
632 /// Try to match Instruction "I" as a comparison against a constant and
633 /// populates the array Vals with the set of values that match (or do not
634 /// match depending on isEQ).
635 /// Return false on failure. On success, the Value the comparison matched
636 /// against is placed in CompValue.
637 /// If CompValue is already set, the function is expected to fail if a match
638 /// is found but the value compared to is different.
639 bool matchInstruction(Instruction *I, bool isEQ) {
640 if (match(I, m_Not(m_Instruction(I))))
641 isEQ = !isEQ;
642
643 Value *Val;
644 if (match(I, m_NUWTrunc(m_Value(Val)))) {
645 // If we already have a value for the switch, it has to match!
646 if (!setValueOnce(Val))
647 return false;
648 UsedICmps++;
649 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
650 return true;
651 }
652 // If this is an icmp against a constant, handle this as one of the cases.
653 ICmpInst *ICI;
654 ConstantInt *C;
655 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
656 (C = getConstantInt(I->getOperand(1), DL)))) {
657 return false;
658 }
659
660 Value *RHSVal;
661 const APInt *RHSC;
662
663 // Pattern match a special case
664 // (x & ~2^z) == y --> x == y || x == y|2^z
665 // This undoes a transformation done by instcombine to fuse 2 compares.
666 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
667 // It's a little bit hard to see why the following transformations are
668 // correct. Here is a CVC3 program to verify them for 64-bit values:
669
670 /*
671 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
672 x : BITVECTOR(64);
673 y : BITVECTOR(64);
674 z : BITVECTOR(64);
675 mask : BITVECTOR(64) = BVSHL(ONE, z);
676 QUERY( (y & ~mask = y) =>
677 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
678 );
679 QUERY( (y | mask = y) =>
680 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
681 );
682 */
683
684 // Please note that each pattern must be a dual implication (<--> or
685 // iff). One directional implication can create spurious matches. If the
686 // implication is only one-way, an unsatisfiable condition on the left
687 // side can imply a satisfiable condition on the right side. Dual
688 // implication ensures that satisfiable conditions are transformed to
689 // other satisfiable conditions and unsatisfiable conditions are
690 // transformed to other unsatisfiable conditions.
691
692 // Here is a concrete example of a unsatisfiable condition on the left
693 // implying a satisfiable condition on the right:
694 //
695 // mask = (1 << z)
696 // (x & ~mask) == y --> (x == y || x == (y | mask))
697 //
698 // Substituting y = 3, z = 0 yields:
699 // (x & -2) == 3 --> (x == 3 || x == 2)
700
701 // Pattern match a special case:
702 /*
703 QUERY( (y & ~mask = y) =>
704 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
705 );
706 */
707 if (match(ICI->getOperand(0),
708 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
709 APInt Mask = ~*RHSC;
710 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
711 // If we already have a value for the switch, it has to match!
712 if (!setValueOnce(RHSVal))
713 return false;
714
715 Vals.push_back(C);
716 Vals.push_back(
717 ConstantInt::get(C->getContext(),
718 C->getValue() | Mask));
719 UsedICmps++;
720 return true;
721 }
722 }
723
724 // Pattern match a special case:
725 /*
726 QUERY( (y | mask = y) =>
727 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
728 );
729 */
730 if (match(ICI->getOperand(0),
731 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
732 APInt Mask = *RHSC;
733 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
734 // If we already have a value for the switch, it has to match!
735 if (!setValueOnce(RHSVal))
736 return false;
737
738 Vals.push_back(C);
739 Vals.push_back(ConstantInt::get(C->getContext(),
740 C->getValue() & ~Mask));
741 UsedICmps++;
742 return true;
743 }
744 }
745
746 // If we already have a value for the switch, it has to match!
747 if (!setValueOnce(ICI->getOperand(0)))
748 return false;
749
750 UsedICmps++;
751 Vals.push_back(C);
752 return true;
753 }
754
755 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
756 ConstantRange Span =
758
759 // Shift the range if the compare is fed by an add. This is the range
760 // compare idiom as emitted by instcombine.
761 Value *CandidateVal = I->getOperand(0);
762 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
763 Span = Span.subtract(*RHSC);
764 CandidateVal = RHSVal;
765 }
766
767 // If this is an and/!= check, then we are looking to build the set of
768 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
769 // x != 0 && x != 1.
770 if (!isEQ)
771 Span = Span.inverse();
772
773 // If there are a ton of values, we don't want to make a ginormous switch.
774 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
775 return false;
776 }
777
778 // If we already have a value for the switch, it has to match!
779 if (!setValueOnce(CandidateVal))
780 return false;
781
782 // Add all values from the range to the set
783 APInt Tmp = Span.getLower();
784 do
785 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
786 while (++Tmp != Span.getUpper());
787
788 UsedICmps++;
789 return true;
790 }
791
792 /// Given a potentially 'or'd or 'and'd together collection of icmp
793 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
794 /// the value being compared, and stick the list constants into the Vals
795 /// vector.
796 /// One "Extra" case is allowed to differ from the other.
797 void gather(Value *V) {
798 Value *Op0, *Op1;
799 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
800 IsEq = true;
801 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
802 IsEq = false;
803 else
804 return;
805 // Keep a stack (SmallVector for efficiency) for depth-first traversal
806 SmallVector<Value *, 8> DFT{Op0, Op1};
807 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
808
809 while (!DFT.empty()) {
810 V = DFT.pop_back_val();
811
812 if (Instruction *I = dyn_cast<Instruction>(V)) {
813 // If it is a || (or && depending on isEQ), process the operands.
814 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
815 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
816 if (Visited.insert(Op1).second)
817 DFT.push_back(Op1);
818 if (Visited.insert(Op0).second)
819 DFT.push_back(Op0);
820
821 continue;
822 }
823
824 // Try to match the current instruction
825 if (matchInstruction(I, IsEq))
826 // Match succeed, continue the loop
827 continue;
828 }
829
830 // One element of the sequence of || (or &&) could not be match as a
831 // comparison against the same value as the others.
832 // We allow only one "Extra" case to be checked before the switch
833 if (!Extra) {
834 Extra = V;
835 continue;
836 }
837 // Failed to parse a proper sequence, abort now
838 CompValue = nullptr;
839 break;
840 }
841 }
842};
843
844} // end anonymous namespace
845
847 MemorySSAUpdater *MSSAU = nullptr) {
848 Instruction *Cond = nullptr;
850 Cond = dyn_cast<Instruction>(SI->getCondition());
851 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
852 if (BI->isConditional())
853 Cond = dyn_cast<Instruction>(BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
855 Cond = dyn_cast<Instruction>(IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
873 if (BI->isConditional() && BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
876 CV = ICI->getOperand(0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
901 Cases.reserve(SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 BranchInst *BI = cast<BranchInst>(TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(ICI->getOperand(1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Cond);
918 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(Cases, BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(V1, V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(V1->begin(), V1->end());
954 array_pod_sort(V2->begin(), V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
990 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
995 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(PredCases, ThisCases))
1004 return false;
1005
1006 if (isa<BranchInst>(TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(i->getCaseValue())) {
1046 Successor->removePredecessor(PredDef);
1047 SI.removeCase(i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Succ);
1098 Succ->removePredecessor(TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(Weights.front(), Weights.back());
1163 }
1164}
1165
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 Instruction *NewBonusInst = BonusInst.clone();
1178
1179 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1180 // Unless the instruction has the same !dbg location as the original
1181 // branch, drop it. When we fold the bonus instructions we want to make
1182 // sure we reset their debug locations in order to avoid stepping on
1183 // dead code caused by folding dead branches.
1184 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1185 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1186 mapAtomInstance(DL, VMap);
1187 }
1188
1189 RemapInstruction(NewBonusInst, VMap,
1191
1192 // If we speculated an instruction, we need to drop any metadata that may
1193 // result in undefined behavior, as the metadata might have been valid
1194 // only given the branch precondition.
1195 // Similarly strip attributes on call parameters that may cause UB in
1196 // location the call is moved to.
1197 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1198
1199 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1200 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1201 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1203
1204 NewBonusInst->takeName(&BonusInst);
1205 BonusInst.setName(NewBonusInst->getName() + ".old");
1206 VMap[&BonusInst] = NewBonusInst;
1207
1208 // Update (liveout) uses of bonus instructions,
1209 // now that the bonus instruction has been cloned into predecessor.
1210 // Note that we expect to be in a block-closed SSA form for this to work!
1211 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1212 auto *UI = cast<Instruction>(U.getUser());
1213 auto *PN = dyn_cast<PHINode>(UI);
1214 if (!PN) {
1215 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1216 "If the user is not a PHI node, then it should be in the same "
1217 "block as, and come after, the original bonus instruction.");
1218 continue; // Keep using the original bonus instruction.
1219 }
1220 // Is this the block-closed SSA form PHI node?
1221 if (PN->getIncomingBlock(U) == BB)
1222 continue; // Great, keep using the original bonus instruction.
1223 // The only other alternative is an "use" when coming from
1224 // the predecessor block - here we should refer to the cloned bonus instr.
1225 assert(PN->getIncomingBlock(U) == PredBlock &&
1226 "Not in block-closed SSA form?");
1227 U.set(NewBonusInst);
1228 }
1229 }
1230
1231 // Key Instructions: We may have propagated atom info into the pred. If the
1232 // pred's terminator already has atom info do nothing as merging would drop
1233 // one atom group anyway. If it doesn't, propagte the remapped atom group
1234 // from BB's terminator.
1235 if (auto &PredDL = PTI->getDebugLoc()) {
1236 auto &DL = BB->getTerminator()->getDebugLoc();
1237 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1238 PredDL.isSameSourceLocation(DL)) {
1239 PTI->setDebugLoc(DL);
1240 RemapSourceAtom(PTI, VMap);
1241 }
1242 }
1243}
1244
1245bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1246 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1247 BasicBlock *BB = TI->getParent();
1248 BasicBlock *Pred = PTI->getParent();
1249
1251
1252 // Figure out which 'cases' to copy from SI to PSI.
1253 std::vector<ValueEqualityComparisonCase> BBCases;
1254 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1255
1256 std::vector<ValueEqualityComparisonCase> PredCases;
1257 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1258
1259 // Based on whether the default edge from PTI goes to BB or not, fill in
1260 // PredCases and PredDefault with the new switch cases we would like to
1261 // build.
1262 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1263
1264 // Update the branch weight metadata along the way
1265 SmallVector<uint64_t, 8> Weights;
1266 bool PredHasWeights = hasBranchWeightMD(*PTI);
1267 bool SuccHasWeights = hasBranchWeightMD(*TI);
1268
1269 if (PredHasWeights) {
1270 getBranchWeights(PTI, Weights);
1271 // branch-weight metadata is inconsistent here.
1272 if (Weights.size() != 1 + PredCases.size())
1273 PredHasWeights = SuccHasWeights = false;
1274 } else if (SuccHasWeights)
1275 // If there are no predecessor weights but there are successor weights,
1276 // populate Weights with 1, which will later be scaled to the sum of
1277 // successor's weights
1278 Weights.assign(1 + PredCases.size(), 1);
1279
1280 SmallVector<uint64_t, 8> SuccWeights;
1281 if (SuccHasWeights) {
1282 getBranchWeights(TI, SuccWeights);
1283 // branch-weight metadata is inconsistent here.
1284 if (SuccWeights.size() != 1 + BBCases.size())
1285 PredHasWeights = SuccHasWeights = false;
1286 } else if (PredHasWeights)
1287 SuccWeights.assign(1 + BBCases.size(), 1);
1288
1289 if (PredDefault == BB) {
1290 // If this is the default destination from PTI, only the edges in TI
1291 // that don't occur in PTI, or that branch to BB will be activated.
1292 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1293 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1294 if (PredCases[i].Dest != BB)
1295 PTIHandled.insert(PredCases[i].Value);
1296 else {
1297 // The default destination is BB, we don't need explicit targets.
1298 std::swap(PredCases[i], PredCases.back());
1299
1300 if (PredHasWeights || SuccHasWeights) {
1301 // Increase weight for the default case.
1302 Weights[0] += Weights[i + 1];
1303 std::swap(Weights[i + 1], Weights.back());
1304 Weights.pop_back();
1305 }
1306
1307 PredCases.pop_back();
1308 --i;
1309 --e;
1310 }
1311
1312 // Reconstruct the new switch statement we will be building.
1313 if (PredDefault != BBDefault) {
1314 PredDefault->removePredecessor(Pred);
1315 if (DTU && PredDefault != BB)
1316 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1317 PredDefault = BBDefault;
1318 ++NewSuccessors[BBDefault];
1319 }
1320
1321 unsigned CasesFromPred = Weights.size();
1322 uint64_t ValidTotalSuccWeight = 0;
1323 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1324 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1325 PredCases.push_back(BBCases[i]);
1326 ++NewSuccessors[BBCases[i].Dest];
1327 if (SuccHasWeights || PredHasWeights) {
1328 // The default weight is at index 0, so weight for the ith case
1329 // should be at index i+1. Scale the cases from successor by
1330 // PredDefaultWeight (Weights[0]).
1331 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1332 ValidTotalSuccWeight += SuccWeights[i + 1];
1333 }
1334 }
1335
1336 if (SuccHasWeights || PredHasWeights) {
1337 ValidTotalSuccWeight += SuccWeights[0];
1338 // Scale the cases from predecessor by ValidTotalSuccWeight.
1339 for (unsigned i = 1; i < CasesFromPred; ++i)
1340 Weights[i] *= ValidTotalSuccWeight;
1341 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1342 Weights[0] *= SuccWeights[0];
1343 }
1344 } else {
1345 // If this is not the default destination from PSI, only the edges
1346 // in SI that occur in PSI with a destination of BB will be
1347 // activated.
1348 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1349 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1350 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1351 if (PredCases[i].Dest == BB) {
1352 PTIHandled.insert(PredCases[i].Value);
1353
1354 if (PredHasWeights || SuccHasWeights) {
1355 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1356 std::swap(Weights[i + 1], Weights.back());
1357 Weights.pop_back();
1358 }
1359
1360 std::swap(PredCases[i], PredCases.back());
1361 PredCases.pop_back();
1362 --i;
1363 --e;
1364 }
1365
1366 // Okay, now we know which constants were sent to BB from the
1367 // predecessor. Figure out where they will all go now.
1368 for (const ValueEqualityComparisonCase &Case : BBCases)
1369 if (PTIHandled.count(Case.Value)) {
1370 // If this is one we are capable of getting...
1371 if (PredHasWeights || SuccHasWeights)
1372 Weights.push_back(WeightsForHandled[Case.Value]);
1373 PredCases.push_back(Case);
1374 ++NewSuccessors[Case.Dest];
1375 PTIHandled.erase(Case.Value); // This constant is taken care of
1376 }
1377
1378 // If there are any constants vectored to BB that TI doesn't handle,
1379 // they must go to the default destination of TI.
1380 for (ConstantInt *I : PTIHandled) {
1381 if (PredHasWeights || SuccHasWeights)
1382 Weights.push_back(WeightsForHandled[I]);
1383 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1384 ++NewSuccessors[BBDefault];
1385 }
1386 }
1387
1388 // Okay, at this point, we know which new successor Pred will get. Make
1389 // sure we update the number of entries in the PHI nodes for these
1390 // successors.
1391 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1392 if (DTU) {
1393 SuccsOfPred = {llvm::from_range, successors(Pred)};
1394 Updates.reserve(Updates.size() + NewSuccessors.size());
1395 }
1396 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1397 NewSuccessors) {
1398 for (auto I : seq(NewSuccessor.second)) {
1399 (void)I;
1400 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1401 }
1402 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1403 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1404 }
1405
1406 Builder.SetInsertPoint(PTI);
1407 // Convert pointer to int before we switch.
1408 if (CV->getType()->isPointerTy()) {
1409 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1410 "Should not end up here with unstable pointers");
1411 CV =
1412 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1413 }
1414
1415 // Now that the successors are updated, create the new Switch instruction.
1416 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1417 NewSI->setDebugLoc(PTI->getDebugLoc());
1418 for (ValueEqualityComparisonCase &V : PredCases)
1419 NewSI->addCase(V.Value, V.Dest);
1420
1421 if (PredHasWeights || SuccHasWeights)
1422 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1423 /*ElideAllZero=*/true);
1424
1426
1427 // Okay, last check. If BB is still a successor of PSI, then we must
1428 // have an infinite loop case. If so, add an infinitely looping block
1429 // to handle the case to preserve the behavior of the code.
1430 BasicBlock *InfLoopBlock = nullptr;
1431 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1432 if (NewSI->getSuccessor(i) == BB) {
1433 if (!InfLoopBlock) {
1434 // Insert it at the end of the function, because it's either code,
1435 // or it won't matter if it's hot. :)
1436 InfLoopBlock =
1437 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1438 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1439 if (DTU)
1440 Updates.push_back(
1441 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1442 }
1443 NewSI->setSuccessor(i, InfLoopBlock);
1444 }
1445
1446 if (DTU) {
1447 if (InfLoopBlock)
1448 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1449
1450 Updates.push_back({DominatorTree::Delete, Pred, BB});
1451
1452 DTU->applyUpdates(Updates);
1453 }
1454
1455 ++NumFoldValueComparisonIntoPredecessors;
1456 return true;
1457}
1458
1459/// The specified terminator is a value equality comparison instruction
1460/// (either a switch or a branch on "X == c").
1461/// See if any of the predecessors of the terminator block are value comparisons
1462/// on the same value. If so, and if safe to do so, fold them together.
1463bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1464 IRBuilder<> &Builder) {
1465 BasicBlock *BB = TI->getParent();
1466 Value *CV = isValueEqualityComparison(TI); // CondVal
1467 assert(CV && "Not a comparison?");
1468
1469 bool Changed = false;
1470
1471 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1472 while (!Preds.empty()) {
1473 BasicBlock *Pred = Preds.pop_back_val();
1474 Instruction *PTI = Pred->getTerminator();
1475
1476 // Don't try to fold into itself.
1477 if (Pred == BB)
1478 continue;
1479
1480 // See if the predecessor is a comparison with the same value.
1481 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1482 if (PCV != CV)
1483 continue;
1484
1485 SmallSetVector<BasicBlock *, 4> FailBlocks;
1486 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1487 for (auto *Succ : FailBlocks) {
1488 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1489 return false;
1490 }
1491 }
1492
1493 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1494 Changed = true;
1495 }
1496 return Changed;
1497}
1498
1499// If we would need to insert a select that uses the value of this invoke
1500// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1501// need to do this), we can't hoist the invoke, as there is nowhere to put the
1502// select in this case.
1504 Instruction *I1, Instruction *I2) {
1505 for (BasicBlock *Succ : successors(BB1)) {
1506 for (const PHINode &PN : Succ->phis()) {
1507 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1508 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1509 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1510 return false;
1511 }
1512 }
1513 }
1514 return true;
1515}
1516
1517// Get interesting characteristics of instructions that
1518// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1519// instructions can be reordered across.
1525
1527 unsigned Flags = 0;
1528 if (I->mayReadFromMemory())
1529 Flags |= SkipReadMem;
1530 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1531 // inalloca) across stacksave/stackrestore boundaries.
1532 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1533 Flags |= SkipSideEffect;
1535 Flags |= SkipImplicitControlFlow;
1536 return Flags;
1537}
1538
1539// Returns true if it is safe to reorder an instruction across preceding
1540// instructions in a basic block.
1541static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1542 // Don't reorder a store over a load.
1543 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1544 return false;
1545
1546 // If we have seen an instruction with side effects, it's unsafe to reorder an
1547 // instruction which reads memory or itself has side effects.
1548 if ((Flags & SkipSideEffect) &&
1549 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1550 return false;
1551
1552 // Reordering across an instruction which does not necessarily transfer
1553 // control to the next instruction is speculation.
1555 return false;
1556
1557 // Hoisting of llvm.deoptimize is only legal together with the next return
1558 // instruction, which this pass is not always able to do.
1559 if (auto *CB = dyn_cast<CallBase>(I))
1560 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1561 return false;
1562
1563 // It's also unsafe/illegal to hoist an instruction above its instruction
1564 // operands
1565 BasicBlock *BB = I->getParent();
1566 for (Value *Op : I->operands()) {
1567 if (auto *J = dyn_cast<Instruction>(Op))
1568 if (J->getParent() == BB)
1569 return false;
1570 }
1571
1572 return true;
1573}
1574
1575static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1576
1577/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1578/// instructions \p I1 and \p I2 can and should be hoisted.
1580 const TargetTransformInfo &TTI) {
1581 // If we're going to hoist a call, make sure that the two instructions
1582 // we're commoning/hoisting are both marked with musttail, or neither of
1583 // them is marked as such. Otherwise, we might end up in a situation where
1584 // we hoist from a block where the terminator is a `ret` to a block where
1585 // the terminator is a `br`, and `musttail` calls expect to be followed by
1586 // a return.
1587 auto *C1 = dyn_cast<CallInst>(I1);
1588 auto *C2 = dyn_cast<CallInst>(I2);
1589 if (C1 && C2)
1590 if (C1->isMustTailCall() != C2->isMustTailCall())
1591 return false;
1592
1593 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1594 return false;
1595
1596 // If any of the two call sites has nomerge or convergent attribute, stop
1597 // hoisting.
1598 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1599 if (CB1->cannotMerge() || CB1->isConvergent())
1600 return false;
1601 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1602 if (CB2->cannotMerge() || CB2->isConvergent())
1603 return false;
1604
1605 return true;
1606}
1607
1608/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1609/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1610/// hoistCommonCodeFromSuccessors. e.g. The input:
1611/// I1 DVRs: { x, z },
1612/// OtherInsts: { I2 DVRs: { x, y, z } }
1613/// would result in hoisting only DbgVariableRecord x.
1615 Instruction *TI, Instruction *I1,
1616 SmallVectorImpl<Instruction *> &OtherInsts) {
1617 if (!I1->hasDbgRecords())
1618 return;
1619 using CurrentAndEndIt =
1620 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1621 // Vector of {Current, End} iterators.
1623 Itrs.reserve(OtherInsts.size() + 1);
1624 // Helper lambdas for lock-step checks:
1625 // Return true if this Current == End.
1626 auto atEnd = [](const CurrentAndEndIt &Pair) {
1627 return Pair.first == Pair.second;
1628 };
1629 // Return true if all Current are identical.
1630 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1631 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1633 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1634 });
1635 };
1636
1637 // Collect the iterators.
1638 Itrs.push_back(
1639 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1640 for (Instruction *Other : OtherInsts) {
1641 if (!Other->hasDbgRecords())
1642 return;
1643 Itrs.push_back(
1644 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1645 }
1646
1647 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1648 // the lock-step DbgRecord are identical, hoist all of them to TI.
1649 // This replicates the dbg.* intrinsic behaviour in
1650 // hoistCommonCodeFromSuccessors.
1651 while (none_of(Itrs, atEnd)) {
1652 bool HoistDVRs = allIdentical(Itrs);
1653 for (CurrentAndEndIt &Pair : Itrs) {
1654 // Increment Current iterator now as we may be about to move the
1655 // DbgRecord.
1656 DbgRecord &DR = *Pair.first++;
1657 if (HoistDVRs) {
1658 DR.removeFromParent();
1659 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1660 }
1661 }
1662 }
1663}
1664
1666 const Instruction *I2) {
1667 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1668 return true;
1669
1670 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1671 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1672 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1673 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1674 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1675
1676 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1677 return I1->getOperand(0) == I2->getOperand(1) &&
1678 I1->getOperand(1) == I2->getOperand(0) &&
1679 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1680 }
1681
1682 return false;
1683}
1684
1685/// If the target supports conditional faulting,
1686/// we look for the following pattern:
1687/// \code
1688/// BB:
1689/// ...
1690/// %cond = icmp ult %x, %y
1691/// br i1 %cond, label %TrueBB, label %FalseBB
1692/// FalseBB:
1693/// store i32 1, ptr %q, align 4
1694/// ...
1695/// TrueBB:
1696/// %maskedloadstore = load i32, ptr %b, align 4
1697/// store i32 %maskedloadstore, ptr %p, align 4
1698/// ...
1699/// \endcode
1700///
1701/// and transform it into:
1702///
1703/// \code
1704/// BB:
1705/// ...
1706/// %cond = icmp ult %x, %y
1707/// %maskedloadstore = cload i32, ptr %b, %cond
1708/// cstore i32 %maskedloadstore, ptr %p, %cond
1709/// cstore i32 1, ptr %q, ~%cond
1710/// br i1 %cond, label %TrueBB, label %FalseBB
1711/// FalseBB:
1712/// ...
1713/// TrueBB:
1714/// ...
1715/// \endcode
1716///
1717/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1718/// e.g.
1719///
1720/// \code
1721/// %vcond = bitcast i1 %cond to <1 x i1>
1722/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1723/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1724/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1725/// call void @llvm.masked.store.v1i32.p0
1726/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1727/// %cond.not = xor i1 %cond, true
1728/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1729/// call void @llvm.masked.store.v1i32.p0
1730/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1731/// \endcode
1732///
1733/// So we need to turn hoisted load/store into cload/cstore.
1734///
1735/// \param BI The branch instruction.
1736/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1737/// will be speculated.
1738/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1740 BranchInst *BI,
1741 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1742 std::optional<bool> Invert, Instruction *Sel) {
1743 auto &Context = BI->getParent()->getContext();
1744 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1745 auto *Cond = BI->getOperand(0);
1746 // Construct the condition if needed.
1747 BasicBlock *BB = BI->getParent();
1748 Value *Mask = nullptr;
1749 Value *MaskFalse = nullptr;
1750 Value *MaskTrue = nullptr;
1751 if (Invert.has_value()) {
1752 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1753 Mask = Builder.CreateBitCast(
1754 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1755 VCondTy);
1756 } else {
1757 IRBuilder<> Builder(BI);
1758 MaskFalse = Builder.CreateBitCast(
1759 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1760 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1761 }
1762 auto PeekThroughBitcasts = [](Value *V) {
1763 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1764 V = BitCast->getOperand(0);
1765 return V;
1766 };
1767 for (auto *I : SpeculatedConditionalLoadsStores) {
1768 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1769 if (!Invert.has_value())
1770 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1771 // We currently assume conditional faulting load/store is supported for
1772 // scalar types only when creating new instructions. This can be easily
1773 // extended for vector types in the future.
1774 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1775 auto *Op0 = I->getOperand(0);
1776 CallInst *MaskedLoadStore = nullptr;
1777 if (auto *LI = dyn_cast<LoadInst>(I)) {
1778 // Handle Load.
1779 auto *Ty = I->getType();
1780 PHINode *PN = nullptr;
1781 Value *PassThru = nullptr;
1782 if (Invert.has_value())
1783 for (User *U : I->users()) {
1784 if ((PN = dyn_cast<PHINode>(U))) {
1785 PassThru = Builder.CreateBitCast(
1786 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1787 FixedVectorType::get(Ty, 1));
1788 } else if (auto *Ins = cast<Instruction>(U);
1789 Sel && Ins->getParent() == BB) {
1790 // This happens when store or/and a speculative instruction between
1791 // load and store were hoisted to the BB. Make sure the masked load
1792 // inserted before its use.
1793 // We assume there's one of such use.
1794 Builder.SetInsertPoint(Ins);
1795 }
1796 }
1797 MaskedLoadStore = Builder.CreateMaskedLoad(
1798 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1799 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1800 if (PN)
1801 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1802 I->replaceAllUsesWith(NewLoadStore);
1803 } else {
1804 // Handle Store.
1805 auto *StoredVal = Builder.CreateBitCast(
1806 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1807 MaskedLoadStore = Builder.CreateMaskedStore(
1808 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1809 }
1810 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1811 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1812 //
1813 // !nonnull, !align : Not support pointer type, no need to keep.
1814 // !range: Load type is changed from scalar to vector, but the metadata on
1815 // vector specifies a per-element range, so the semantics stay the
1816 // same. Keep it.
1817 // !annotation: Not impact semantics. Keep it.
1818 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1819 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1820 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1821 // FIXME: DIAssignID is not supported for masked store yet.
1822 // (Verifier::visitDIAssignIDMetadata)
1824 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1825 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1826 });
1827 MaskedLoadStore->copyMetadata(*I);
1828 I->eraseFromParent();
1829 }
1830}
1831
1833 const TargetTransformInfo &TTI) {
1834 // Not handle volatile or atomic.
1835 bool IsStore = false;
1836 if (auto *L = dyn_cast<LoadInst>(I)) {
1837 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1838 return false;
1839 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1840 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1841 return false;
1842 IsStore = true;
1843 } else
1844 return false;
1845
1846 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1847 // That's why we have the alignment limitation.
1848 // FIXME: Update the prototype of the intrinsics?
1849 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1851}
1852
1853/// Hoist any common code in the successor blocks up into the block. This
1854/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1855/// given, only perform hoisting in case all successors blocks contain matching
1856/// instructions only. In that case, all instructions can be hoisted and the
1857/// original branch will be replaced and selects for PHIs are added.
1858bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1859 bool AllInstsEqOnly) {
1860 // This does very trivial matching, with limited scanning, to find identical
1861 // instructions in the two blocks. In particular, we don't want to get into
1862 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1863 // such, we currently just scan for obviously identical instructions in an
1864 // identical order, possibly separated by the same number of non-identical
1865 // instructions.
1866 BasicBlock *BB = TI->getParent();
1867 unsigned int SuccSize = succ_size(BB);
1868 if (SuccSize < 2)
1869 return false;
1870
1871 // If either of the blocks has it's address taken, then we can't do this fold,
1872 // because the code we'd hoist would no longer run when we jump into the block
1873 // by it's address.
1874 for (auto *Succ : successors(BB)) {
1875 if (Succ->hasAddressTaken())
1876 return false;
1877 if (Succ->getSinglePredecessor())
1878 continue;
1879 // If Succ has >1 predecessors, continue to check if the Succ contains only
1880 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1881 // can relax the condition based on the assumptiom that the program would
1882 // never enter Succ and trigger such an UB.
1883 if (isa<UnreachableInst>(*Succ->begin()))
1884 continue;
1885 return false;
1886 }
1887 // The second of pair is a SkipFlags bitmask.
1888 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1889 SmallVector<SuccIterPair, 8> SuccIterPairs;
1890 for (auto *Succ : successors(BB)) {
1891 BasicBlock::iterator SuccItr = Succ->begin();
1892 if (isa<PHINode>(*SuccItr))
1893 return false;
1894 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1895 }
1896
1897 if (AllInstsEqOnly) {
1898 // Check if all instructions in the successor blocks match. This allows
1899 // hoisting all instructions and removing the blocks we are hoisting from,
1900 // so does not add any new instructions.
1902 // Check if sizes and terminators of all successors match.
1903 bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
1904 Instruction *Term0 = Succs[0]->getTerminator();
1905 Instruction *Term = Succ->getTerminator();
1906 return !Term->isSameOperationAs(Term0) ||
1907 !equal(Term->operands(), Term0->operands()) ||
1908 Succs[0]->size() != Succ->size();
1909 });
1910 if (!AllSame)
1911 return false;
1912 if (AllSame) {
1913 LockstepReverseIterator<true> LRI(Succs);
1914 while (LRI.isValid()) {
1915 Instruction *I0 = (*LRI)[0];
1916 if (any_of(*LRI, [I0](Instruction *I) {
1917 return !areIdenticalUpToCommutativity(I0, I);
1918 })) {
1919 return false;
1920 }
1921 --LRI;
1922 }
1923 }
1924 // Now we know that all instructions in all successors can be hoisted. Let
1925 // the loop below handle the hoisting.
1926 }
1927
1928 // Count how many instructions were not hoisted so far. There's a limit on how
1929 // many instructions we skip, serving as a compilation time control as well as
1930 // preventing excessive increase of life ranges.
1931 unsigned NumSkipped = 0;
1932 // If we find an unreachable instruction at the beginning of a basic block, we
1933 // can still hoist instructions from the rest of the basic blocks.
1934 if (SuccIterPairs.size() > 2) {
1935 erase_if(SuccIterPairs,
1936 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1937 if (SuccIterPairs.size() < 2)
1938 return false;
1939 }
1940
1941 bool Changed = false;
1942
1943 for (;;) {
1944 auto *SuccIterPairBegin = SuccIterPairs.begin();
1945 auto &BB1ItrPair = *SuccIterPairBegin++;
1946 auto OtherSuccIterPairRange =
1947 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1948 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1949
1950 Instruction *I1 = &*BB1ItrPair.first;
1951
1952 bool AllInstsAreIdentical = true;
1953 bool HasTerminator = I1->isTerminator();
1954 for (auto &SuccIter : OtherSuccIterRange) {
1955 Instruction *I2 = &*SuccIter;
1956 HasTerminator |= I2->isTerminator();
1957 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1958 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1959 AllInstsAreIdentical = false;
1960 }
1961
1962 SmallVector<Instruction *, 8> OtherInsts;
1963 for (auto &SuccIter : OtherSuccIterRange)
1964 OtherInsts.push_back(&*SuccIter);
1965
1966 // If we are hoisting the terminator instruction, don't move one (making a
1967 // broken BB), instead clone it, and remove BI.
1968 if (HasTerminator) {
1969 // Even if BB, which contains only one unreachable instruction, is ignored
1970 // at the beginning of the loop, we can hoist the terminator instruction.
1971 // If any instructions remain in the block, we cannot hoist terminators.
1972 if (NumSkipped || !AllInstsAreIdentical) {
1973 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1974 return Changed;
1975 }
1976
1977 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1978 Changed;
1979 }
1980
1981 if (AllInstsAreIdentical) {
1982 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1983 AllInstsAreIdentical =
1984 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1985 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1986 Instruction *I2 = &*Pair.first;
1987 unsigned SkipFlagsBB2 = Pair.second;
1988 // Even if the instructions are identical, it may not
1989 // be safe to hoist them if we have skipped over
1990 // instructions with side effects or their operands
1991 // weren't hoisted.
1992 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1994 });
1995 }
1996
1997 if (AllInstsAreIdentical) {
1998 BB1ItrPair.first++;
1999 // For a normal instruction, we just move one to right before the
2000 // branch, then replace all uses of the other with the first. Finally,
2001 // we remove the now redundant second instruction.
2002 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2003 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2004 // and leave any that were not hoisted behind (by calling moveBefore
2005 // rather than moveBeforePreserving).
2006 I1->moveBefore(TI->getIterator());
2007 for (auto &SuccIter : OtherSuccIterRange) {
2008 Instruction *I2 = &*SuccIter++;
2009 assert(I2 != I1);
2010 if (!I2->use_empty())
2011 I2->replaceAllUsesWith(I1);
2012 I1->andIRFlags(I2);
2013 if (auto *CB = dyn_cast<CallBase>(I1)) {
2014 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2015 assert(Success && "We should not be trying to hoist callbases "
2016 "with non-intersectable attributes");
2017 // For NDEBUG Compile.
2018 (void)Success;
2019 }
2020
2021 combineMetadataForCSE(I1, I2, true);
2022 // I1 and I2 are being combined into a single instruction. Its debug
2023 // location is the merged locations of the original instructions.
2024 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2025 I2->eraseFromParent();
2026 }
2027 if (!Changed)
2028 NumHoistCommonCode += SuccIterPairs.size();
2029 Changed = true;
2030 NumHoistCommonInstrs += SuccIterPairs.size();
2031 } else {
2032 if (NumSkipped >= HoistCommonSkipLimit) {
2033 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2034 return Changed;
2035 }
2036 // We are about to skip over a pair of non-identical instructions. Record
2037 // if any have characteristics that would prevent reordering instructions
2038 // across them.
2039 for (auto &SuccIterPair : SuccIterPairs) {
2040 Instruction *I = &*SuccIterPair.first++;
2041 SuccIterPair.second |= skippedInstrFlags(I);
2042 }
2043 ++NumSkipped;
2044 }
2045 }
2046}
2047
2048bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2049 Instruction *TI, Instruction *I1,
2050 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
2051
2052 auto *BI = dyn_cast<BranchInst>(TI);
2053
2054 bool Changed = false;
2055 BasicBlock *TIParent = TI->getParent();
2056 BasicBlock *BB1 = I1->getParent();
2057
2058 // Use only for an if statement.
2059 auto *I2 = *OtherSuccTIs.begin();
2060 auto *BB2 = I2->getParent();
2061 if (BI) {
2062 assert(OtherSuccTIs.size() == 1);
2063 assert(BI->getSuccessor(0) == I1->getParent());
2064 assert(BI->getSuccessor(1) == I2->getParent());
2065 }
2066
2067 // In the case of an if statement, we try to hoist an invoke.
2068 // FIXME: Can we define a safety predicate for CallBr?
2069 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2070 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2071 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2072 return false;
2073
2074 // TODO: callbr hoisting currently disabled pending further study.
2075 if (isa<CallBrInst>(I1))
2076 return false;
2077
2078 for (BasicBlock *Succ : successors(BB1)) {
2079 for (PHINode &PN : Succ->phis()) {
2080 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2081 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2082 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2083 if (BB1V == BB2V)
2084 continue;
2085
2086 // In the case of an if statement, check for
2087 // passingValueIsAlwaysUndefined here because we would rather eliminate
2088 // undefined control flow then converting it to a select.
2089 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2091 return false;
2092 }
2093 }
2094 }
2095
2096 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2097 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2098 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2099 // Clone the terminator and hoist it into the pred, without any debug info.
2100 Instruction *NT = I1->clone();
2101 NT->insertInto(TIParent, TI->getIterator());
2102 if (!NT->getType()->isVoidTy()) {
2103 I1->replaceAllUsesWith(NT);
2104 for (Instruction *OtherSuccTI : OtherSuccTIs)
2105 OtherSuccTI->replaceAllUsesWith(NT);
2106 NT->takeName(I1);
2107 }
2108 Changed = true;
2109 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2110
2111 // Ensure terminator gets a debug location, even an unknown one, in case
2112 // it involves inlinable calls.
2114 Locs.push_back(I1->getDebugLoc());
2115 for (auto *OtherSuccTI : OtherSuccTIs)
2116 Locs.push_back(OtherSuccTI->getDebugLoc());
2117 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2118
2119 // PHIs created below will adopt NT's merged DebugLoc.
2120 IRBuilder<NoFolder> Builder(NT);
2121
2122 // In the case of an if statement, hoisting one of the terminators from our
2123 // successor is a great thing. Unfortunately, the successors of the if/else
2124 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2125 // must agree for all PHI nodes, so we insert select instruction to compute
2126 // the final result.
2127 if (BI) {
2128 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2129 for (BasicBlock *Succ : successors(BB1)) {
2130 for (PHINode &PN : Succ->phis()) {
2131 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2132 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2133 if (BB1V == BB2V)
2134 continue;
2135
2136 // These values do not agree. Insert a select instruction before NT
2137 // that determines the right value.
2138 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2139 if (!SI) {
2140 // Propagate fast-math-flags from phi node to its replacement select.
2142 BI->getCondition(), BB1V, BB2V,
2143 isa<FPMathOperator>(PN) ? &PN : nullptr,
2144 BB1V->getName() + "." + BB2V->getName(), BI));
2145 }
2146
2147 // Make the PHI node use the select for all incoming values for BB1/BB2
2148 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2149 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2150 PN.setIncomingValue(i, SI);
2151 }
2152 }
2153 }
2154
2156
2157 // Update any PHI nodes in our new successors.
2158 for (BasicBlock *Succ : successors(BB1)) {
2159 addPredecessorToBlock(Succ, TIParent, BB1);
2160 if (DTU)
2161 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2162 }
2163
2164 if (DTU)
2165 for (BasicBlock *Succ : successors(TI))
2166 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2167
2169 if (DTU)
2170 DTU->applyUpdates(Updates);
2171 return Changed;
2172}
2173
2174// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2175// into variables.
2177 int OpIdx) {
2178 // Divide/Remainder by constant is typically much cheaper than by variable.
2179 if (I->isIntDivRem())
2180 return OpIdx != 1;
2181 return !isa<IntrinsicInst>(I);
2182}
2183
2184// All instructions in Insts belong to different blocks that all unconditionally
2185// branch to a common successor. Analyze each instruction and return true if it
2186// would be possible to sink them into their successor, creating one common
2187// instruction instead. For every value that would be required to be provided by
2188// PHI node (because an operand varies in each input block), add to PHIOperands.
2191 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2192 // Prune out obviously bad instructions to move. Each instruction must have
2193 // the same number of uses, and we check later that the uses are consistent.
2194 std::optional<unsigned> NumUses;
2195 for (auto *I : Insts) {
2196 // These instructions may change or break semantics if moved.
2197 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2198 I->getType()->isTokenTy())
2199 return false;
2200
2201 // Do not try to sink an instruction in an infinite loop - it can cause
2202 // this algorithm to infinite loop.
2203 if (I->getParent()->getSingleSuccessor() == I->getParent())
2204 return false;
2205
2206 // Conservatively return false if I is an inline-asm instruction. Sinking
2207 // and merging inline-asm instructions can potentially create arguments
2208 // that cannot satisfy the inline-asm constraints.
2209 // If the instruction has nomerge or convergent attribute, return false.
2210 if (const auto *C = dyn_cast<CallBase>(I))
2211 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2212 return false;
2213
2214 if (!NumUses)
2215 NumUses = I->getNumUses();
2216 else if (NumUses != I->getNumUses())
2217 return false;
2218 }
2219
2220 const Instruction *I0 = Insts.front();
2221 const auto I0MMRA = MMRAMetadata(*I0);
2222 for (auto *I : Insts) {
2223 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2224 return false;
2225
2226 // Treat MMRAs conservatively. This pass can be quite aggressive and
2227 // could drop a lot of MMRAs otherwise.
2228 if (MMRAMetadata(*I) != I0MMRA)
2229 return false;
2230 }
2231
2232 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2233 // then the other phi operands must match the instructions from Insts. This
2234 // also has to hold true for any phi nodes that would be created as a result
2235 // of sinking. Both of these cases are represented by PhiOperands.
2236 for (const Use &U : I0->uses()) {
2237 auto It = PHIOperands.find(&U);
2238 if (It == PHIOperands.end())
2239 // There may be uses in other blocks when sinking into a loop header.
2240 return false;
2241 if (!equal(Insts, It->second))
2242 return false;
2243 }
2244
2245 // For calls to be sinkable, they must all be indirect, or have same callee.
2246 // I.e. if we have two direct calls to different callees, we don't want to
2247 // turn that into an indirect call. Likewise, if we have an indirect call,
2248 // and a direct call, we don't actually want to have a single indirect call.
2249 if (isa<CallBase>(I0)) {
2250 auto IsIndirectCall = [](const Instruction *I) {
2251 return cast<CallBase>(I)->isIndirectCall();
2252 };
2253 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2254 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2255 if (HaveIndirectCalls) {
2256 if (!AllCallsAreIndirect)
2257 return false;
2258 } else {
2259 // All callees must be identical.
2260 Value *Callee = nullptr;
2261 for (const Instruction *I : Insts) {
2262 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2263 if (!Callee)
2264 Callee = CurrCallee;
2265 else if (Callee != CurrCallee)
2266 return false;
2267 }
2268 }
2269 }
2270
2271 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2272 Value *Op = I0->getOperand(OI);
2273 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2274 assert(I->getNumOperands() == I0->getNumOperands());
2275 return I->getOperand(OI) == I0->getOperand(OI);
2276 };
2277 if (!all_of(Insts, SameAsI0)) {
2280 // We can't create a PHI from this GEP.
2281 return false;
2282 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2283 for (auto *I : Insts)
2284 Ops.push_back(I->getOperand(OI));
2285 }
2286 }
2287 return true;
2288}
2289
2290// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2291// instruction of every block in Blocks to their common successor, commoning
2292// into one instruction.
2294 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2295
2296 // canSinkInstructions returning true guarantees that every block has at
2297 // least one non-terminator instruction.
2299 for (auto *BB : Blocks) {
2300 Instruction *I = BB->getTerminator();
2301 I = I->getPrevNode();
2302 Insts.push_back(I);
2303 }
2304
2305 // We don't need to do any more checking here; canSinkInstructions should
2306 // have done it all for us.
2307 SmallVector<Value*, 4> NewOperands;
2308 Instruction *I0 = Insts.front();
2309 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2310 // This check is different to that in canSinkInstructions. There, we
2311 // cared about the global view once simplifycfg (and instcombine) have
2312 // completed - it takes into account PHIs that become trivially
2313 // simplifiable. However here we need a more local view; if an operand
2314 // differs we create a PHI and rely on instcombine to clean up the very
2315 // small mess we may make.
2316 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2317 return I->getOperand(O) != I0->getOperand(O);
2318 });
2319 if (!NeedPHI) {
2320 NewOperands.push_back(I0->getOperand(O));
2321 continue;
2322 }
2323
2324 // Create a new PHI in the successor block and populate it.
2325 auto *Op = I0->getOperand(O);
2326 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2327 auto *PN =
2328 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2329 PN->insertBefore(BBEnd->begin());
2330 for (auto *I : Insts)
2331 PN->addIncoming(I->getOperand(O), I->getParent());
2332 NewOperands.push_back(PN);
2333 }
2334
2335 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2336 // and move it to the start of the successor block.
2337 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2338 I0->getOperandUse(O).set(NewOperands[O]);
2339
2340 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2341
2342 // Update metadata and IR flags, and merge debug locations.
2343 for (auto *I : Insts)
2344 if (I != I0) {
2345 // The debug location for the "common" instruction is the merged locations
2346 // of all the commoned instructions. We start with the original location
2347 // of the "common" instruction and iteratively merge each location in the
2348 // loop below.
2349 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2350 // However, as N-way merge for CallInst is rare, so we use simplified API
2351 // instead of using complex API for N-way merge.
2352 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2353 combineMetadataForCSE(I0, I, true);
2354 I0->andIRFlags(I);
2355 if (auto *CB = dyn_cast<CallBase>(I0)) {
2356 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2357 assert(Success && "We should not be trying to sink callbases "
2358 "with non-intersectable attributes");
2359 // For NDEBUG Compile.
2360 (void)Success;
2361 }
2362 }
2363
2364 for (User *U : make_early_inc_range(I0->users())) {
2365 // canSinkLastInstruction checked that all instructions are only used by
2366 // phi nodes in a way that allows replacing the phi node with the common
2367 // instruction.
2368 auto *PN = cast<PHINode>(U);
2369 PN->replaceAllUsesWith(I0);
2370 PN->eraseFromParent();
2371 }
2372
2373 // Finally nuke all instructions apart from the common instruction.
2374 for (auto *I : Insts) {
2375 if (I == I0)
2376 continue;
2377 // The remaining uses are debug users, replace those with the common inst.
2378 // In most (all?) cases this just introduces a use-before-def.
2379 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2380 I->replaceAllUsesWith(I0);
2381 I->eraseFromParent();
2382 }
2383}
2384
2385/// Check whether BB's predecessors end with unconditional branches. If it is
2386/// true, sink any common code from the predecessors to BB.
2388 DomTreeUpdater *DTU) {
2389 // We support two situations:
2390 // (1) all incoming arcs are unconditional
2391 // (2) there are non-unconditional incoming arcs
2392 //
2393 // (2) is very common in switch defaults and
2394 // else-if patterns;
2395 //
2396 // if (a) f(1);
2397 // else if (b) f(2);
2398 //
2399 // produces:
2400 //
2401 // [if]
2402 // / \
2403 // [f(1)] [if]
2404 // | | \
2405 // | | |
2406 // | [f(2)]|
2407 // \ | /
2408 // [ end ]
2409 //
2410 // [end] has two unconditional predecessor arcs and one conditional. The
2411 // conditional refers to the implicit empty 'else' arc. This conditional
2412 // arc can also be caused by an empty default block in a switch.
2413 //
2414 // In this case, we attempt to sink code from all *unconditional* arcs.
2415 // If we can sink instructions from these arcs (determined during the scan
2416 // phase below) we insert a common successor for all unconditional arcs and
2417 // connect that to [end], to enable sinking:
2418 //
2419 // [if]
2420 // / \
2421 // [x(1)] [if]
2422 // | | \
2423 // | | \
2424 // | [x(2)] |
2425 // \ / |
2426 // [sink.split] |
2427 // \ /
2428 // [ end ]
2429 //
2430 SmallVector<BasicBlock*,4> UnconditionalPreds;
2431 bool HaveNonUnconditionalPredecessors = false;
2432 for (auto *PredBB : predecessors(BB)) {
2433 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2434 if (PredBr && PredBr->isUnconditional())
2435 UnconditionalPreds.push_back(PredBB);
2436 else
2437 HaveNonUnconditionalPredecessors = true;
2438 }
2439 if (UnconditionalPreds.size() < 2)
2440 return false;
2441
2442 // We take a two-step approach to tail sinking. First we scan from the end of
2443 // each block upwards in lockstep. If the n'th instruction from the end of each
2444 // block can be sunk, those instructions are added to ValuesToSink and we
2445 // carry on. If we can sink an instruction but need to PHI-merge some operands
2446 // (because they're not identical in each instruction) we add these to
2447 // PHIOperands.
2448 // We prepopulate PHIOperands with the phis that already exist in BB.
2450 for (PHINode &PN : BB->phis()) {
2452 for (const Use &U : PN.incoming_values())
2453 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2454 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2455 for (BasicBlock *Pred : UnconditionalPreds)
2456 Ops.push_back(*IncomingVals[Pred]);
2457 }
2458
2459 int ScanIdx = 0;
2460 SmallPtrSet<Value*,4> InstructionsToSink;
2461 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2462 while (LRI.isValid() &&
2463 canSinkInstructions(*LRI, PHIOperands)) {
2464 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2465 << "\n");
2466 InstructionsToSink.insert_range(*LRI);
2467 ++ScanIdx;
2468 --LRI;
2469 }
2470
2471 // If no instructions can be sunk, early-return.
2472 if (ScanIdx == 0)
2473 return false;
2474
2475 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2476
2477 if (!followedByDeoptOrUnreachable) {
2478 // Check whether this is the pointer operand of a load/store.
2479 auto IsMemOperand = [](Use &U) {
2480 auto *I = cast<Instruction>(U.getUser());
2481 if (isa<LoadInst>(I))
2482 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2483 if (isa<StoreInst>(I))
2484 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2485 return false;
2486 };
2487
2488 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2489 // actually sink before encountering instruction that is unprofitable to
2490 // sink?
2491 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2492 unsigned NumPHIInsts = 0;
2493 for (Use &U : (*LRI)[0]->operands()) {
2494 auto It = PHIOperands.find(&U);
2495 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2496 return InstructionsToSink.contains(V);
2497 })) {
2498 ++NumPHIInsts;
2499 // Do not separate a load/store from the gep producing the address.
2500 // The gep can likely be folded into the load/store as an addressing
2501 // mode. Additionally, a load of a gep is easier to analyze than a
2502 // load of a phi.
2503 if (IsMemOperand(U) &&
2504 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2505 return false;
2506 // FIXME: this check is overly optimistic. We may end up not sinking
2507 // said instruction, due to the very same profitability check.
2508 // See @creating_too_many_phis in sink-common-code.ll.
2509 }
2510 }
2511 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2512 return NumPHIInsts <= 1;
2513 };
2514
2515 // We've determined that we are going to sink last ScanIdx instructions,
2516 // and recorded them in InstructionsToSink. Now, some instructions may be
2517 // unprofitable to sink. But that determination depends on the instructions
2518 // that we are going to sink.
2519
2520 // First, forward scan: find the first instruction unprofitable to sink,
2521 // recording all the ones that are profitable to sink.
2522 // FIXME: would it be better, after we detect that not all are profitable.
2523 // to either record the profitable ones, or erase the unprofitable ones?
2524 // Maybe we need to choose (at runtime) the one that will touch least
2525 // instrs?
2526 LRI.reset();
2527 int Idx = 0;
2528 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2529 while (Idx < ScanIdx) {
2530 if (!ProfitableToSinkInstruction(LRI)) {
2531 // Too many PHIs would be created.
2532 LLVM_DEBUG(
2533 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2534 break;
2535 }
2536 InstructionsProfitableToSink.insert_range(*LRI);
2537 --LRI;
2538 ++Idx;
2539 }
2540
2541 // If no instructions can be sunk, early-return.
2542 if (Idx == 0)
2543 return false;
2544
2545 // Did we determine that (only) some instructions are unprofitable to sink?
2546 if (Idx < ScanIdx) {
2547 // Okay, some instructions are unprofitable.
2548 ScanIdx = Idx;
2549 InstructionsToSink = InstructionsProfitableToSink;
2550
2551 // But, that may make other instructions unprofitable, too.
2552 // So, do a backward scan, do any earlier instructions become
2553 // unprofitable?
2554 assert(
2555 !ProfitableToSinkInstruction(LRI) &&
2556 "We already know that the last instruction is unprofitable to sink");
2557 ++LRI;
2558 --Idx;
2559 while (Idx >= 0) {
2560 // If we detect that an instruction becomes unprofitable to sink,
2561 // all earlier instructions won't be sunk either,
2562 // so preemptively keep InstructionsProfitableToSink in sync.
2563 // FIXME: is this the most performant approach?
2564 for (auto *I : *LRI)
2565 InstructionsProfitableToSink.erase(I);
2566 if (!ProfitableToSinkInstruction(LRI)) {
2567 // Everything starting with this instruction won't be sunk.
2568 ScanIdx = Idx;
2569 InstructionsToSink = InstructionsProfitableToSink;
2570 }
2571 ++LRI;
2572 --Idx;
2573 }
2574 }
2575
2576 // If no instructions can be sunk, early-return.
2577 if (ScanIdx == 0)
2578 return false;
2579 }
2580
2581 bool Changed = false;
2582
2583 if (HaveNonUnconditionalPredecessors) {
2584 if (!followedByDeoptOrUnreachable) {
2585 // It is always legal to sink common instructions from unconditional
2586 // predecessors. However, if not all predecessors are unconditional,
2587 // this transformation might be pessimizing. So as a rule of thumb,
2588 // don't do it unless we'd sink at least one non-speculatable instruction.
2589 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2590 LRI.reset();
2591 int Idx = 0;
2592 bool Profitable = false;
2593 while (Idx < ScanIdx) {
2594 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2595 Profitable = true;
2596 break;
2597 }
2598 --LRI;
2599 ++Idx;
2600 }
2601 if (!Profitable)
2602 return false;
2603 }
2604
2605 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2606 // We have a conditional edge and we're going to sink some instructions.
2607 // Insert a new block postdominating all blocks we're going to sink from.
2608 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2609 // Edges couldn't be split.
2610 return false;
2611 Changed = true;
2612 }
2613
2614 // Now that we've analyzed all potential sinking candidates, perform the
2615 // actual sink. We iteratively sink the last non-terminator of the source
2616 // blocks into their common successor unless doing so would require too
2617 // many PHI instructions to be generated (currently only one PHI is allowed
2618 // per sunk instruction).
2619 //
2620 // We can use InstructionsToSink to discount values needing PHI-merging that will
2621 // actually be sunk in a later iteration. This allows us to be more
2622 // aggressive in what we sink. This does allow a false positive where we
2623 // sink presuming a later value will also be sunk, but stop half way through
2624 // and never actually sink it which means we produce more PHIs than intended.
2625 // This is unlikely in practice though.
2626 int SinkIdx = 0;
2627 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2628 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2629 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2630 << "\n");
2631
2632 // Because we've sunk every instruction in turn, the current instruction to
2633 // sink is always at index 0.
2634 LRI.reset();
2635
2636 sinkLastInstruction(UnconditionalPreds);
2637 NumSinkCommonInstrs++;
2638 Changed = true;
2639 }
2640 if (SinkIdx != 0)
2641 ++NumSinkCommonCode;
2642 return Changed;
2643}
2644
2645namespace {
2646
2647struct CompatibleSets {
2648 using SetTy = SmallVector<InvokeInst *, 2>;
2649
2651
2652 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2653
2654 SetTy &getCompatibleSet(InvokeInst *II);
2655
2656 void insert(InvokeInst *II);
2657};
2658
2659CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2660 // Perform a linear scan over all the existing sets, see if the new `invoke`
2661 // is compatible with any particular set. Since we know that all the `invokes`
2662 // within a set are compatible, only check the first `invoke` in each set.
2663 // WARNING: at worst, this has quadratic complexity.
2664 for (CompatibleSets::SetTy &Set : Sets) {
2665 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2666 return Set;
2667 }
2668
2669 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2670 return Sets.emplace_back();
2671}
2672
2673void CompatibleSets::insert(InvokeInst *II) {
2674 getCompatibleSet(II).emplace_back(II);
2675}
2676
2677bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2678 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2679
2680 // Can we theoretically merge these `invoke`s?
2681 auto IsIllegalToMerge = [](InvokeInst *II) {
2682 return II->cannotMerge() || II->isInlineAsm();
2683 };
2684 if (any_of(Invokes, IsIllegalToMerge))
2685 return false;
2686
2687 // Either both `invoke`s must be direct,
2688 // or both `invoke`s must be indirect.
2689 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2690 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2691 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2692 if (HaveIndirectCalls) {
2693 if (!AllCallsAreIndirect)
2694 return false;
2695 } else {
2696 // All callees must be identical.
2697 Value *Callee = nullptr;
2698 for (InvokeInst *II : Invokes) {
2699 Value *CurrCallee = II->getCalledOperand();
2700 assert(CurrCallee && "There is always a called operand.");
2701 if (!Callee)
2702 Callee = CurrCallee;
2703 else if (Callee != CurrCallee)
2704 return false;
2705 }
2706 }
2707
2708 // Either both `invoke`s must not have a normal destination,
2709 // or both `invoke`s must have a normal destination,
2710 auto HasNormalDest = [](InvokeInst *II) {
2711 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2712 };
2713 if (any_of(Invokes, HasNormalDest)) {
2714 // Do not merge `invoke` that does not have a normal destination with one
2715 // that does have a normal destination, even though doing so would be legal.
2716 if (!all_of(Invokes, HasNormalDest))
2717 return false;
2718
2719 // All normal destinations must be identical.
2720 BasicBlock *NormalBB = nullptr;
2721 for (InvokeInst *II : Invokes) {
2722 BasicBlock *CurrNormalBB = II->getNormalDest();
2723 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2724 if (!NormalBB)
2725 NormalBB = CurrNormalBB;
2726 else if (NormalBB != CurrNormalBB)
2727 return false;
2728 }
2729
2730 // In the normal destination, the incoming values for these two `invoke`s
2731 // must be compatible.
2732 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2734 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2735 &EquivalenceSet))
2736 return false;
2737 }
2738
2739#ifndef NDEBUG
2740 // All unwind destinations must be identical.
2741 // We know that because we have started from said unwind destination.
2742 BasicBlock *UnwindBB = nullptr;
2743 for (InvokeInst *II : Invokes) {
2744 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2745 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2746 if (!UnwindBB)
2747 UnwindBB = CurrUnwindBB;
2748 else
2749 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2750 }
2751#endif
2752
2753 // In the unwind destination, the incoming values for these two `invoke`s
2754 // must be compatible.
2756 Invokes.front()->getUnwindDest(),
2757 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2758 return false;
2759
2760 // Ignoring arguments, these `invoke`s must be identical,
2761 // including operand bundles.
2762 const InvokeInst *II0 = Invokes.front();
2763 for (auto *II : Invokes.drop_front())
2764 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2765 return false;
2766
2767 // Can we theoretically form the data operands for the merged `invoke`?
2768 auto IsIllegalToMergeArguments = [](auto Ops) {
2769 Use &U0 = std::get<0>(Ops);
2770 Use &U1 = std::get<1>(Ops);
2771 if (U0 == U1)
2772 return false;
2774 U0.getOperandNo());
2775 };
2776 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2777 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2778 IsIllegalToMergeArguments))
2779 return false;
2780
2781 return true;
2782}
2783
2784} // namespace
2785
2786// Merge all invokes in the provided set, all of which are compatible
2787// as per the `CompatibleSets::shouldBelongToSameSet()`.
2789 DomTreeUpdater *DTU) {
2790 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2791
2793 if (DTU)
2794 Updates.reserve(2 + 3 * Invokes.size());
2795
2796 bool HasNormalDest =
2797 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2798
2799 // Clone one of the invokes into a new basic block.
2800 // Since they are all compatible, it doesn't matter which invoke is cloned.
2801 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2802 InvokeInst *II0 = Invokes.front();
2803 BasicBlock *II0BB = II0->getParent();
2804 BasicBlock *InsertBeforeBlock =
2805 II0->getParent()->getIterator()->getNextNode();
2806 Function *Func = II0BB->getParent();
2807 LLVMContext &Ctx = II0->getContext();
2808
2809 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2810 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2811
2812 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2813 // NOTE: all invokes have the same attributes, so no handling needed.
2814 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2815
2816 if (!HasNormalDest) {
2817 // This set does not have a normal destination,
2818 // so just form a new block with unreachable terminator.
2819 BasicBlock *MergedNormalDest = BasicBlock::Create(
2820 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2821 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2822 UI->setDebugLoc(DebugLoc::getTemporary());
2823 MergedInvoke->setNormalDest(MergedNormalDest);
2824 }
2825
2826 // The unwind destination, however, remainds identical for all invokes here.
2827
2828 return MergedInvoke;
2829 }();
2830
2831 if (DTU) {
2832 // Predecessor blocks that contained these invokes will now branch to
2833 // the new block that contains the merged invoke, ...
2834 for (InvokeInst *II : Invokes)
2835 Updates.push_back(
2836 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2837
2838 // ... which has the new `unreachable` block as normal destination,
2839 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2840 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2841 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2842 SuccBBOfMergedInvoke});
2843
2844 // Since predecessor blocks now unconditionally branch to a new block,
2845 // they no longer branch to their original successors.
2846 for (InvokeInst *II : Invokes)
2847 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2848 Updates.push_back(
2849 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2850 }
2851
2852 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2853
2854 // Form the merged operands for the merged invoke.
2855 for (Use &U : MergedInvoke->operands()) {
2856 // Only PHI together the indirect callees and data operands.
2857 if (MergedInvoke->isCallee(&U)) {
2858 if (!IsIndirectCall)
2859 continue;
2860 } else if (!MergedInvoke->isDataOperand(&U))
2861 continue;
2862
2863 // Don't create trivial PHI's with all-identical incoming values.
2864 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2865 return II->getOperand(U.getOperandNo()) != U.get();
2866 });
2867 if (!NeedPHI)
2868 continue;
2869
2870 // Form a PHI out of all the data ops under this index.
2872 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2873 for (InvokeInst *II : Invokes)
2874 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2875
2876 U.set(PN);
2877 }
2878
2879 // We've ensured that each PHI node has compatible (identical) incoming values
2880 // when coming from each of the `invoke`s in the current merge set,
2881 // so update the PHI nodes accordingly.
2882 for (BasicBlock *Succ : successors(MergedInvoke))
2883 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2884 /*ExistPred=*/Invokes.front()->getParent());
2885
2886 // And finally, replace the original `invoke`s with an unconditional branch
2887 // to the block with the merged `invoke`. Also, give that merged `invoke`
2888 // the merged debugloc of all the original `invoke`s.
2889 DILocation *MergedDebugLoc = nullptr;
2890 for (InvokeInst *II : Invokes) {
2891 // Compute the debug location common to all the original `invoke`s.
2892 if (!MergedDebugLoc)
2893 MergedDebugLoc = II->getDebugLoc();
2894 else
2895 MergedDebugLoc =
2896 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2897
2898 // And replace the old `invoke` with an unconditionally branch
2899 // to the block with the merged `invoke`.
2900 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2901 OrigSuccBB->removePredecessor(II->getParent());
2902 auto *BI = BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2903 // The unconditional branch is part of the replacement for the original
2904 // invoke, so should use its DebugLoc.
2905 BI->setDebugLoc(II->getDebugLoc());
2906 bool Success = MergedInvoke->tryIntersectAttributes(II);
2907 assert(Success && "Merged invokes with incompatible attributes");
2908 // For NDEBUG Compile
2909 (void)Success;
2910 II->replaceAllUsesWith(MergedInvoke);
2911 II->eraseFromParent();
2912 ++NumInvokesMerged;
2913 }
2914 MergedInvoke->setDebugLoc(MergedDebugLoc);
2915 ++NumInvokeSetsFormed;
2916
2917 if (DTU)
2918 DTU->applyUpdates(Updates);
2919}
2920
2921/// If this block is a `landingpad` exception handling block, categorize all
2922/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2923/// being "mergeable" together, and then merge invokes in each set together.
2924///
2925/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2926/// [...] [...]
2927/// | |
2928/// [invoke0] [invoke1]
2929/// / \ / \
2930/// [cont0] [landingpad] [cont1]
2931/// to:
2932/// [...] [...]
2933/// \ /
2934/// [invoke]
2935/// / \
2936/// [cont] [landingpad]
2937///
2938/// But of course we can only do that if the invokes share the `landingpad`,
2939/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2940/// and the invoked functions are "compatible".
2943 return false;
2944
2945 bool Changed = false;
2946
2947 // FIXME: generalize to all exception handling blocks?
2948 if (!BB->isLandingPad())
2949 return Changed;
2950
2951 CompatibleSets Grouper;
2952
2953 // Record all the predecessors of this `landingpad`. As per verifier,
2954 // the only allowed predecessor is the unwind edge of an `invoke`.
2955 // We want to group "compatible" `invokes` into the same set to be merged.
2956 for (BasicBlock *PredBB : predecessors(BB))
2957 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2958
2959 // And now, merge `invoke`s that were grouped togeter.
2960 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2961 if (Invokes.size() < 2)
2962 continue;
2963 Changed = true;
2964 mergeCompatibleInvokesImpl(Invokes, DTU);
2965 }
2966
2967 return Changed;
2968}
2969
2970namespace {
2971/// Track ephemeral values, which should be ignored for cost-modelling
2972/// purposes. Requires walking instructions in reverse order.
2973class EphemeralValueTracker {
2974 SmallPtrSet<const Instruction *, 32> EphValues;
2975
2976 bool isEphemeral(const Instruction *I) {
2977 if (isa<AssumeInst>(I))
2978 return true;
2979 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2980 all_of(I->users(), [&](const User *U) {
2981 return EphValues.count(cast<Instruction>(U));
2982 });
2983 }
2984
2985public:
2986 bool track(const Instruction *I) {
2987 if (isEphemeral(I)) {
2988 EphValues.insert(I);
2989 return true;
2990 }
2991 return false;
2992 }
2993
2994 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2995};
2996} // namespace
2997
2998/// Determine if we can hoist sink a sole store instruction out of a
2999/// conditional block.
3000///
3001/// We are looking for code like the following:
3002/// BrBB:
3003/// store i32 %add, i32* %arrayidx2
3004/// ... // No other stores or function calls (we could be calling a memory
3005/// ... // function).
3006/// %cmp = icmp ult %x, %y
3007/// br i1 %cmp, label %EndBB, label %ThenBB
3008/// ThenBB:
3009/// store i32 %add5, i32* %arrayidx2
3010/// br label EndBB
3011/// EndBB:
3012/// ...
3013/// We are going to transform this into:
3014/// BrBB:
3015/// store i32 %add, i32* %arrayidx2
3016/// ... //
3017/// %cmp = icmp ult %x, %y
3018/// %add.add5 = select i1 %cmp, i32 %add, %add5
3019/// store i32 %add.add5, i32* %arrayidx2
3020/// ...
3021///
3022/// \return The pointer to the value of the previous store if the store can be
3023/// hoisted into the predecessor block. 0 otherwise.
3025 BasicBlock *StoreBB, BasicBlock *EndBB) {
3026 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3027 if (!StoreToHoist)
3028 return nullptr;
3029
3030 // Volatile or atomic.
3031 if (!StoreToHoist->isSimple())
3032 return nullptr;
3033
3034 Value *StorePtr = StoreToHoist->getPointerOperand();
3035 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3036
3037 // Look for a store to the same pointer in BrBB.
3038 unsigned MaxNumInstToLookAt = 9;
3039 // Skip pseudo probe intrinsic calls which are not really killing any memory
3040 // accesses.
3041 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
3042 if (!MaxNumInstToLookAt)
3043 break;
3044 --MaxNumInstToLookAt;
3045
3046 // Could be calling an instruction that affects memory like free().
3047 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3048 return nullptr;
3049
3050 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3051 // Found the previous store to same location and type. Make sure it is
3052 // simple, to avoid introducing a spurious non-atomic write after an
3053 // atomic write.
3054 if (SI->getPointerOperand() == StorePtr &&
3055 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3056 SI->getAlign() >= StoreToHoist->getAlign())
3057 // Found the previous store, return its value operand.
3058 return SI->getValueOperand();
3059 return nullptr; // Unknown store.
3060 }
3061
3062 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3063 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3064 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3065 Value *Obj = getUnderlyingObject(StorePtr);
3066 bool ExplicitlyDereferenceableOnly;
3067 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3069 PointerMayBeCaptured(Obj, /*ReturnCaptures=*/false,
3071 (!ExplicitlyDereferenceableOnly ||
3072 isDereferenceablePointer(StorePtr, StoreTy,
3073 LI->getDataLayout()))) {
3074 // Found a previous load, return it.
3075 return LI;
3076 }
3077 }
3078 // The load didn't work out, but we may still find a store.
3079 }
3080 }
3081
3082 return nullptr;
3083}
3084
3085/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3086/// converted to selects.
3088 BasicBlock *EndBB,
3089 unsigned &SpeculatedInstructions,
3090 InstructionCost &Cost,
3091 const TargetTransformInfo &TTI) {
3093 BB->getParent()->hasMinSize()
3096
3097 bool HaveRewritablePHIs = false;
3098 for (PHINode &PN : EndBB->phis()) {
3099 Value *OrigV = PN.getIncomingValueForBlock(BB);
3100 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3101
3102 // FIXME: Try to remove some of the duplication with
3103 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3104 if (ThenV == OrigV)
3105 continue;
3106
3107 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3108 CmpInst::makeCmpResultType(PN.getType()),
3110
3111 // Don't convert to selects if we could remove undefined behavior instead.
3112 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3114 return false;
3115
3116 HaveRewritablePHIs = true;
3117 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3118 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3119 if (!OrigCE && !ThenCE)
3120 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3121
3122 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3123 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3124 InstructionCost MaxCost =
3126 if (OrigCost + ThenCost > MaxCost)
3127 return false;
3128
3129 // Account for the cost of an unfolded ConstantExpr which could end up
3130 // getting expanded into Instructions.
3131 // FIXME: This doesn't account for how many operations are combined in the
3132 // constant expression.
3133 ++SpeculatedInstructions;
3134 if (SpeculatedInstructions > 1)
3135 return false;
3136 }
3137
3138 return HaveRewritablePHIs;
3139}
3140
3142 std::optional<bool> Invert,
3143 const TargetTransformInfo &TTI) {
3144 // If the branch is non-unpredictable, and is predicted to *not* branch to
3145 // the `then` block, then avoid speculating it.
3146 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3147 return true;
3148
3149 uint64_t TWeight, FWeight;
3150 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3151 return true;
3152
3153 if (!Invert.has_value())
3154 return false;
3155
3156 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3157 BranchProbability BIEndProb =
3158 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3159 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3160 return BIEndProb < Likely;
3161}
3162
3163/// Speculate a conditional basic block flattening the CFG.
3164///
3165/// Note that this is a very risky transform currently. Speculating
3166/// instructions like this is most often not desirable. Instead, there is an MI
3167/// pass which can do it with full awareness of the resource constraints.
3168/// However, some cases are "obvious" and we should do directly. An example of
3169/// this is speculating a single, reasonably cheap instruction.
3170///
3171/// There is only one distinct advantage to flattening the CFG at the IR level:
3172/// it makes very common but simplistic optimizations such as are common in
3173/// instcombine and the DAG combiner more powerful by removing CFG edges and
3174/// modeling their effects with easier to reason about SSA value graphs.
3175///
3176///
3177/// An illustration of this transform is turning this IR:
3178/// \code
3179/// BB:
3180/// %cmp = icmp ult %x, %y
3181/// br i1 %cmp, label %EndBB, label %ThenBB
3182/// ThenBB:
3183/// %sub = sub %x, %y
3184/// br label BB2
3185/// EndBB:
3186/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3187/// ...
3188/// \endcode
3189///
3190/// Into this IR:
3191/// \code
3192/// BB:
3193/// %cmp = icmp ult %x, %y
3194/// %sub = sub %x, %y
3195/// %cond = select i1 %cmp, 0, %sub
3196/// ...
3197/// \endcode
3198///
3199/// \returns true if the conditional block is removed.
3200bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3201 BasicBlock *ThenBB) {
3202 if (!Options.SpeculateBlocks)
3203 return false;
3204
3205 // Be conservative for now. FP select instruction can often be expensive.
3206 Value *BrCond = BI->getCondition();
3207 if (isa<FCmpInst>(BrCond))
3208 return false;
3209
3210 BasicBlock *BB = BI->getParent();
3211 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3212 InstructionCost Budget =
3214
3215 // If ThenBB is actually on the false edge of the conditional branch, remember
3216 // to swap the select operands later.
3217 bool Invert = false;
3218 if (ThenBB != BI->getSuccessor(0)) {
3219 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3220 Invert = true;
3221 }
3222 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3223
3224 if (!isProfitableToSpeculate(BI, Invert, TTI))
3225 return false;
3226
3227 // Keep a count of how many times instructions are used within ThenBB when
3228 // they are candidates for sinking into ThenBB. Specifically:
3229 // - They are defined in BB, and
3230 // - They have no side effects, and
3231 // - All of their uses are in ThenBB.
3232 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3233
3234 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3235
3236 unsigned SpeculatedInstructions = 0;
3237 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3238 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3239 Value *SpeculatedStoreValue = nullptr;
3240 StoreInst *SpeculatedStore = nullptr;
3241 EphemeralValueTracker EphTracker;
3242 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3243 // Skip pseudo probes. The consequence is we lose track of the branch
3244 // probability for ThenBB, which is fine since the optimization here takes
3245 // place regardless of the branch probability.
3246 if (isa<PseudoProbeInst>(I)) {
3247 // The probe should be deleted so that it will not be over-counted when
3248 // the samples collected on the non-conditional path are counted towards
3249 // the conditional path. We leave it for the counts inference algorithm to
3250 // figure out a proper count for an unknown probe.
3251 SpeculatedPseudoProbes.push_back(&I);
3252 continue;
3253 }
3254
3255 // Ignore ephemeral values, they will be dropped by the transform.
3256 if (EphTracker.track(&I))
3257 continue;
3258
3259 // Only speculatively execute a single instruction (not counting the
3260 // terminator) for now.
3261 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3263 SpeculatedConditionalLoadsStores.size() <
3265 // Not count load/store into cost if target supports conditional faulting
3266 // b/c it's cheap to speculate it.
3267 if (IsSafeCheapLoadStore)
3268 SpeculatedConditionalLoadsStores.push_back(&I);
3269 else
3270 ++SpeculatedInstructions;
3271
3272 if (SpeculatedInstructions > 1)
3273 return false;
3274
3275 // Don't hoist the instruction if it's unsafe or expensive.
3276 if (!IsSafeCheapLoadStore &&
3278 !(HoistCondStores && !SpeculatedStoreValue &&
3279 (SpeculatedStoreValue =
3280 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3281 return false;
3282 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3285 return false;
3286
3287 // Store the store speculation candidate.
3288 if (!SpeculatedStore && SpeculatedStoreValue)
3289 SpeculatedStore = cast<StoreInst>(&I);
3290
3291 // Do not hoist the instruction if any of its operands are defined but not
3292 // used in BB. The transformation will prevent the operand from
3293 // being sunk into the use block.
3294 for (Use &Op : I.operands()) {
3296 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3297 continue; // Not a candidate for sinking.
3298
3299 ++SinkCandidateUseCounts[OpI];
3300 }
3301 }
3302
3303 // Consider any sink candidates which are only used in ThenBB as costs for
3304 // speculation. Note, while we iterate over a DenseMap here, we are summing
3305 // and so iteration order isn't significant.
3306 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3307 if (Inst->hasNUses(Count)) {
3308 ++SpeculatedInstructions;
3309 if (SpeculatedInstructions > 1)
3310 return false;
3311 }
3312
3313 // Check that we can insert the selects and that it's not too expensive to do
3314 // so.
3315 bool Convert =
3316 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3318 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3319 SpeculatedInstructions, Cost, TTI);
3320 if (!Convert || Cost > Budget)
3321 return false;
3322
3323 // If we get here, we can hoist the instruction and if-convert.
3324 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3325
3326 Instruction *Sel = nullptr;
3327 // Insert a select of the value of the speculated store.
3328 if (SpeculatedStoreValue) {
3329 IRBuilder<NoFolder> Builder(BI);
3330 Value *OrigV = SpeculatedStore->getValueOperand();
3331 Value *TrueV = SpeculatedStore->getValueOperand();
3332 Value *FalseV = SpeculatedStoreValue;
3333 if (Invert)
3334 std::swap(TrueV, FalseV);
3335 Value *S = Builder.CreateSelect(
3336 BrCond, TrueV, FalseV, "spec.store.select", BI);
3337 Sel = cast<Instruction>(S);
3338 SpeculatedStore->setOperand(0, S);
3339 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3340 SpeculatedStore->getDebugLoc());
3341 // The value stored is still conditional, but the store itself is now
3342 // unconditonally executed, so we must be sure that any linked dbg.assign
3343 // intrinsics are tracking the new stored value (the result of the
3344 // select). If we don't, and the store were to be removed by another pass
3345 // (e.g. DSE), then we'd eventually end up emitting a location describing
3346 // the conditional value, unconditionally.
3347 //
3348 // === Before this transformation ===
3349 // pred:
3350 // store %one, %x.dest, !DIAssignID !1
3351 // dbg.assign %one, "x", ..., !1, ...
3352 // br %cond if.then
3353 //
3354 // if.then:
3355 // store %two, %x.dest, !DIAssignID !2
3356 // dbg.assign %two, "x", ..., !2, ...
3357 //
3358 // === After this transformation ===
3359 // pred:
3360 // store %one, %x.dest, !DIAssignID !1
3361 // dbg.assign %one, "x", ..., !1
3362 /// ...
3363 // %merge = select %cond, %two, %one
3364 // store %merge, %x.dest, !DIAssignID !2
3365 // dbg.assign %merge, "x", ..., !2
3366 for (DbgVariableRecord *DbgAssign :
3367 at::getDVRAssignmentMarkers(SpeculatedStore))
3368 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3369 DbgAssign->replaceVariableLocationOp(OrigV, S);
3370 }
3371
3372 // Metadata can be dependent on the condition we are hoisting above.
3373 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3374 // to avoid making it appear as if the condition is a constant, which would
3375 // be misleading while debugging.
3376 // Similarly strip attributes that maybe dependent on condition we are
3377 // hoisting above.
3378 for (auto &I : make_early_inc_range(*ThenBB)) {
3379 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3380 I.dropLocation();
3381 }
3382 I.dropUBImplyingAttrsAndMetadata();
3383
3384 // Drop ephemeral values.
3385 if (EphTracker.contains(&I)) {
3386 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3387 I.eraseFromParent();
3388 }
3389 }
3390
3391 // Hoist the instructions.
3392 // Drop DbgVariableRecords attached to these instructions.
3393 for (auto &It : *ThenBB)
3394 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3395 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3396 // equivalent).
3397 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3398 !DVR || !DVR->isDbgAssign())
3399 It.dropOneDbgRecord(&DR);
3400 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3401 std::prev(ThenBB->end()));
3402
3403 if (!SpeculatedConditionalLoadsStores.empty())
3404 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3405 Sel);
3406
3407 // Insert selects and rewrite the PHI operands.
3408 IRBuilder<NoFolder> Builder(BI);
3409 for (PHINode &PN : EndBB->phis()) {
3410 unsigned OrigI = PN.getBasicBlockIndex(BB);
3411 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3412 Value *OrigV = PN.getIncomingValue(OrigI);
3413 Value *ThenV = PN.getIncomingValue(ThenI);
3414
3415 // Skip PHIs which are trivial.
3416 if (OrigV == ThenV)
3417 continue;
3418
3419 // Create a select whose true value is the speculatively executed value and
3420 // false value is the pre-existing value. Swap them if the branch
3421 // destinations were inverted.
3422 Value *TrueV = ThenV, *FalseV = OrigV;
3423 if (Invert)
3424 std::swap(TrueV, FalseV);
3425 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3426 PN.setIncomingValue(OrigI, V);
3427 PN.setIncomingValue(ThenI, V);
3428 }
3429
3430 // Remove speculated pseudo probes.
3431 for (Instruction *I : SpeculatedPseudoProbes)
3432 I->eraseFromParent();
3433
3434 ++NumSpeculations;
3435 return true;
3436}
3437
3439
3440// Return false if number of blocks searched is too much.
3441static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3442 BlocksSet &ReachesNonLocalUses) {
3443 if (BB == DefBB)
3444 return true;
3445 if (!ReachesNonLocalUses.insert(BB).second)
3446 return true;
3447
3448 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3449 return false;
3450 for (BasicBlock *Pred : predecessors(BB))
3451 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3452 return false;
3453 return true;
3454}
3455
3456/// Return true if we can thread a branch across this block.
3458 BlocksSet &NonLocalUseBlocks) {
3459 int Size = 0;
3460 EphemeralValueTracker EphTracker;
3461
3462 // Walk the loop in reverse so that we can identify ephemeral values properly
3463 // (values only feeding assumes).
3464 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3465 // Can't fold blocks that contain noduplicate or convergent calls.
3466 if (CallInst *CI = dyn_cast<CallInst>(&I))
3467 if (CI->cannotDuplicate() || CI->isConvergent())
3468 return false;
3469
3470 // Ignore ephemeral values which are deleted during codegen.
3471 // We will delete Phis while threading, so Phis should not be accounted in
3472 // block's size.
3473 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3474 if (Size++ > MaxSmallBlockSize)
3475 return false; // Don't clone large BB's.
3476 }
3477
3478 // Record blocks with non-local uses of values defined in the current basic
3479 // block.
3480 for (User *U : I.users()) {
3482 BasicBlock *UsedInBB = UI->getParent();
3483 if (UsedInBB == BB) {
3484 if (isa<PHINode>(UI))
3485 return false;
3486 } else
3487 NonLocalUseBlocks.insert(UsedInBB);
3488 }
3489
3490 // Looks ok, continue checking.
3491 }
3492
3493 return true;
3494}
3495
3497 BasicBlock *To) {
3498 // Don't look past the block defining the value, we might get the value from
3499 // a previous loop iteration.
3500 auto *I = dyn_cast<Instruction>(V);
3501 if (I && I->getParent() == To)
3502 return nullptr;
3503
3504 // We know the value if the From block branches on it.
3505 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3506 if (BI && BI->isConditional() && BI->getCondition() == V &&
3507 BI->getSuccessor(0) != BI->getSuccessor(1))
3508 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3510
3511 return nullptr;
3512}
3513
3514/// If we have a conditional branch on something for which we know the constant
3515/// value in predecessors (e.g. a phi node in the current block), thread edges
3516/// from the predecessor to their ultimate destination.
3517static std::optional<bool>
3519 const DataLayout &DL,
3520 AssumptionCache *AC) {
3522 BasicBlock *BB = BI->getParent();
3523 Value *Cond = BI->getCondition();
3525 if (PN && PN->getParent() == BB) {
3526 // Degenerate case of a single entry PHI.
3527 if (PN->getNumIncomingValues() == 1) {
3529 return true;
3530 }
3531
3532 for (Use &U : PN->incoming_values())
3533 if (auto *CB = dyn_cast<ConstantInt>(U))
3534 KnownValues[CB].insert(PN->getIncomingBlock(U));
3535 } else {
3536 for (BasicBlock *Pred : predecessors(BB)) {
3537 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3538 KnownValues[CB].insert(Pred);
3539 }
3540 }
3541
3542 if (KnownValues.empty())
3543 return false;
3544
3545 // Now we know that this block has multiple preds and two succs.
3546 // Check that the block is small enough and record which non-local blocks use
3547 // values defined in the block.
3548
3549 BlocksSet NonLocalUseBlocks;
3550 BlocksSet ReachesNonLocalUseBlocks;
3551 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3552 return false;
3553
3554 // Jump-threading can only be done to destinations where no values defined
3555 // in BB are live.
3556
3557 // Quickly check if both destinations have uses. If so, jump-threading cannot
3558 // be done.
3559 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3560 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3561 return false;
3562
3563 // Search backward from NonLocalUseBlocks to find which blocks
3564 // reach non-local uses.
3565 for (BasicBlock *UseBB : NonLocalUseBlocks)
3566 // Give up if too many blocks are searched.
3567 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3568 return false;
3569
3570 for (const auto &Pair : KnownValues) {
3571 ConstantInt *CB = Pair.first;
3572 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3573 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3574
3575 // Okay, we now know that all edges from PredBB should be revectored to
3576 // branch to RealDest.
3577 if (RealDest == BB)
3578 continue; // Skip self loops.
3579
3580 // Skip if the predecessor's terminator is an indirect branch.
3581 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3582 return isa<IndirectBrInst>(PredBB->getTerminator());
3583 }))
3584 continue;
3585
3586 // Only revector to RealDest if no values defined in BB are live.
3587 if (ReachesNonLocalUseBlocks.contains(RealDest))
3588 continue;
3589
3590 LLVM_DEBUG({
3591 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3592 << " has value " << *Pair.first << " in predecessors:\n";
3593 for (const BasicBlock *PredBB : Pair.second)
3594 dbgs() << " " << PredBB->getName() << "\n";
3595 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3596 });
3597
3598 // Split the predecessors we are threading into a new edge block. We'll
3599 // clone the instructions into this block, and then redirect it to RealDest.
3600 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3601
3602 // TODO: These just exist to reduce test diff, we can drop them if we like.
3603 EdgeBB->setName(RealDest->getName() + ".critedge");
3604 EdgeBB->moveBefore(RealDest);
3605
3606 // Update PHI nodes.
3607 addPredecessorToBlock(RealDest, EdgeBB, BB);
3608
3609 // BB may have instructions that are being threaded over. Clone these
3610 // instructions into EdgeBB. We know that there will be no uses of the
3611 // cloned instructions outside of EdgeBB.
3612 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3613 ValueToValueMapTy TranslateMap; // Track translated values.
3614 TranslateMap[Cond] = CB;
3615
3616 // RemoveDIs: track instructions that we optimise away while folding, so
3617 // that we can copy DbgVariableRecords from them later.
3618 BasicBlock::iterator SrcDbgCursor = BB->begin();
3619 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3620 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3621 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3622 continue;
3623 }
3624 // Clone the instruction.
3625 Instruction *N = BBI->clone();
3626 // Insert the new instruction into its new home.
3627 N->insertInto(EdgeBB, InsertPt);
3628
3629 if (BBI->hasName())
3630 N->setName(BBI->getName() + ".c");
3631
3632 // Update operands due to translation.
3633 // Key Instructions: Remap all the atom groups.
3634 if (const DebugLoc &DL = BBI->getDebugLoc())
3635 mapAtomInstance(DL, TranslateMap);
3636 RemapInstruction(N, TranslateMap,
3638
3639 // Check for trivial simplification.
3640 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3641 if (!BBI->use_empty())
3642 TranslateMap[&*BBI] = V;
3643 if (!N->mayHaveSideEffects()) {
3644 N->eraseFromParent(); // Instruction folded away, don't need actual
3645 // inst
3646 N = nullptr;
3647 }
3648 } else {
3649 if (!BBI->use_empty())
3650 TranslateMap[&*BBI] = N;
3651 }
3652 if (N) {
3653 // Copy all debug-info attached to instructions from the last we
3654 // successfully clone, up to this instruction (they might have been
3655 // folded away).
3656 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3657 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3658 SrcDbgCursor = std::next(BBI);
3659 // Clone debug-info on this instruction too.
3660 N->cloneDebugInfoFrom(&*BBI);
3661
3662 // Register the new instruction with the assumption cache if necessary.
3663 if (auto *Assume = dyn_cast<AssumeInst>(N))
3664 if (AC)
3665 AC->registerAssumption(Assume);
3666 }
3667 }
3668
3669 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3670 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3671 InsertPt->cloneDebugInfoFrom(BI);
3672
3673 BB->removePredecessor(EdgeBB);
3674 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3675 EdgeBI->setSuccessor(0, RealDest);
3676 EdgeBI->setDebugLoc(BI->getDebugLoc());
3677
3678 if (DTU) {
3680 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3681 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3682 DTU->applyUpdates(Updates);
3683 }
3684
3685 // For simplicity, we created a separate basic block for the edge. Merge
3686 // it back into the predecessor if possible. This not only avoids
3687 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3688 // bypass the check for trivial cycles above.
3689 MergeBlockIntoPredecessor(EdgeBB, DTU);
3690
3691 // Signal repeat, simplifying any other constants.
3692 return std::nullopt;
3693 }
3694
3695 return false;
3696}
3697
3698bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(BranchInst *BI) {
3699 // Note: If BB is a loop header then there is a risk that threading introduces
3700 // a non-canonical loop by moving a back edge. So we avoid this optimization
3701 // for loop headers if NeedCanonicalLoop is set.
3702 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3703 return false;
3704
3705 std::optional<bool> Result;
3706 bool EverChanged = false;
3707 do {
3708 // Note that None means "we changed things, but recurse further."
3709 Result =
3711 EverChanged |= Result == std::nullopt || *Result;
3712 } while (Result == std::nullopt);
3713 return EverChanged;
3714}
3715
3716/// Given a BB that starts with the specified two-entry PHI node,
3717/// see if we can eliminate it.
3720 const DataLayout &DL,
3721 bool SpeculateUnpredictables) {
3722 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3723 // statement", which has a very simple dominance structure. Basically, we
3724 // are trying to find the condition that is being branched on, which
3725 // subsequently causes this merge to happen. We really want control
3726 // dependence information for this check, but simplifycfg can't keep it up
3727 // to date, and this catches most of the cases we care about anyway.
3728 BasicBlock *BB = PN->getParent();
3729
3730 BasicBlock *IfTrue, *IfFalse;
3731 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3732 if (!DomBI)
3733 return false;
3734 Value *IfCond = DomBI->getCondition();
3735 // Don't bother if the branch will be constant folded trivially.
3736 if (isa<ConstantInt>(IfCond))
3737 return false;
3738
3739 BasicBlock *DomBlock = DomBI->getParent();
3742 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3743 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3744 });
3745 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3746 "Will have either one or two blocks to speculate.");
3747
3748 // If the branch is non-unpredictable, see if we either predictably jump to
3749 // the merge bb (if we have only a single 'then' block), or if we predictably
3750 // jump to one specific 'then' block (if we have two of them).
3751 // It isn't beneficial to speculatively execute the code
3752 // from the block that we know is predictably not entered.
3753 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3754 if (!IsUnpredictable) {
3755 uint64_t TWeight, FWeight;
3756 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3757 (TWeight + FWeight) != 0) {
3758 BranchProbability BITrueProb =
3759 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3760 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3761 BranchProbability BIFalseProb = BITrueProb.getCompl();
3762 if (IfBlocks.size() == 1) {
3763 BranchProbability BIBBProb =
3764 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3765 if (BIBBProb >= Likely)
3766 return false;
3767 } else {
3768 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3769 return false;
3770 }
3771 }
3772 }
3773
3774 // Don't try to fold an unreachable block. For example, the phi node itself
3775 // can't be the candidate if-condition for a select that we want to form.
3776 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3777 if (IfCondPhiInst->getParent() == BB)
3778 return false;
3779
3780 // Okay, we found that we can merge this two-entry phi node into a select.
3781 // Doing so would require us to fold *all* two entry phi nodes in this block.
3782 // At some point this becomes non-profitable (particularly if the target
3783 // doesn't support cmov's). Only do this transformation if there are two or
3784 // fewer PHI nodes in this block.
3785 unsigned NumPhis = 0;
3786 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3787 if (NumPhis > 2)
3788 return false;
3789
3790 // Loop over the PHI's seeing if we can promote them all to select
3791 // instructions. While we are at it, keep track of the instructions
3792 // that need to be moved to the dominating block.
3793 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3794 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3795 InstructionCost Cost = 0;
3796 InstructionCost Budget =
3798 if (SpeculateUnpredictables && IsUnpredictable)
3799 Budget += TTI.getBranchMispredictPenalty();
3800
3801 bool Changed = false;
3802 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3803 PHINode *PN = cast<PHINode>(II++);
3804 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3805 PN->replaceAllUsesWith(V);
3806 PN->eraseFromParent();
3807 Changed = true;
3808 continue;
3809 }
3810
3811 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3812 AggressiveInsts, Cost, Budget, TTI, AC,
3813 ZeroCostInstructions) ||
3814 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3815 AggressiveInsts, Cost, Budget, TTI, AC,
3816 ZeroCostInstructions))
3817 return Changed;
3818 }
3819
3820 // If we folded the first phi, PN dangles at this point. Refresh it. If
3821 // we ran out of PHIs then we simplified them all.
3822 PN = dyn_cast<PHINode>(BB->begin());
3823 if (!PN)
3824 return true;
3825
3826 // Return true if at least one of these is a 'not', and another is either
3827 // a 'not' too, or a constant.
3828 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3829 if (!match(V0, m_Not(m_Value())))
3830 std::swap(V0, V1);
3831 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3832 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3833 };
3834
3835 // Don't fold i1 branches on PHIs which contain binary operators or
3836 // (possibly inverted) select form of or/ands, unless one of
3837 // the incoming values is an 'not' and another one is freely invertible.
3838 // These can often be turned into switches and other things.
3839 auto IsBinOpOrAnd = [](Value *V) {
3840 return match(
3842 };
3843 if (PN->getType()->isIntegerTy(1) &&
3844 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3845 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3846 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3847 PN->getIncomingValue(1)))
3848 return Changed;
3849
3850 // If all PHI nodes are promotable, check to make sure that all instructions
3851 // in the predecessor blocks can be promoted as well. If not, we won't be able
3852 // to get rid of the control flow, so it's not worth promoting to select
3853 // instructions.
3854 for (BasicBlock *IfBlock : IfBlocks)
3855 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3856 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3857 // This is not an aggressive instruction that we can promote.
3858 // Because of this, we won't be able to get rid of the control flow, so
3859 // the xform is not worth it.
3860 return Changed;
3861 }
3862
3863 // If either of the blocks has it's address taken, we can't do this fold.
3864 if (any_of(IfBlocks,
3865 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3866 return Changed;
3867
3868 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3869 if (IsUnpredictable) dbgs() << " (unpredictable)";
3870 dbgs() << " T: " << IfTrue->getName()
3871 << " F: " << IfFalse->getName() << "\n");
3872
3873 // If we can still promote the PHI nodes after this gauntlet of tests,
3874 // do all of the PHI's now.
3875
3876 // Move all 'aggressive' instructions, which are defined in the
3877 // conditional parts of the if's up to the dominating block.
3878 for (BasicBlock *IfBlock : IfBlocks)
3879 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3880
3881 IRBuilder<NoFolder> Builder(DomBI);
3882 // Propagate fast-math-flags from phi nodes to replacement selects.
3883 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3884 // Change the PHI node into a select instruction.
3885 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3886 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3887
3888 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3889 isa<FPMathOperator>(PN) ? PN : nullptr,
3890 "", DomBI);
3891 PN->replaceAllUsesWith(Sel);
3892 Sel->takeName(PN);
3893 PN->eraseFromParent();
3894 }
3895
3896 // At this point, all IfBlocks are empty, so our if statement
3897 // has been flattened. Change DomBlock to jump directly to our new block to
3898 // avoid other simplifycfg's kicking in on the diamond.
3899 Builder.CreateBr(BB);
3900
3902 if (DTU) {
3903 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3904 for (auto *Successor : successors(DomBlock))
3905 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3906 }
3907
3908 DomBI->eraseFromParent();
3909 if (DTU)
3910 DTU->applyUpdates(Updates);
3911
3912 return true;
3913}
3914
3917 Value *RHS, const Twine &Name = "") {
3918 // Try to relax logical op to binary op.
3919 if (impliesPoison(RHS, LHS))
3920 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3921 if (Opc == Instruction::And)
3922 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3923 if (Opc == Instruction::Or)
3924 return Builder.CreateLogicalOr(LHS, RHS, Name);
3925 llvm_unreachable("Invalid logical opcode");
3926}
3927
3928/// Return true if either PBI or BI has branch weight available, and store
3929/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3930/// not have branch weight, use 1:1 as its weight.
3932 uint64_t &PredTrueWeight,
3933 uint64_t &PredFalseWeight,
3934 uint64_t &SuccTrueWeight,
3935 uint64_t &SuccFalseWeight) {
3936 bool PredHasWeights =
3937 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3938 bool SuccHasWeights =
3939 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3940 if (PredHasWeights || SuccHasWeights) {
3941 if (!PredHasWeights)
3942 PredTrueWeight = PredFalseWeight = 1;
3943 if (!SuccHasWeights)
3944 SuccTrueWeight = SuccFalseWeight = 1;
3945 return true;
3946 } else {
3947 return false;
3948 }
3949}
3950
3951/// Determine if the two branches share a common destination and deduce a glue
3952/// that joins the branches' conditions to arrive at the common destination if
3953/// that would be profitable.
3954static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3956 const TargetTransformInfo *TTI) {
3957 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3958 "Both blocks must end with a conditional branches.");
3960 "PredBB must be a predecessor of BB.");
3961
3962 // We have the potential to fold the conditions together, but if the
3963 // predecessor branch is predictable, we may not want to merge them.
3964 uint64_t PTWeight, PFWeight;
3965 BranchProbability PBITrueProb, Likely;
3966 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3967 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3968 (PTWeight + PFWeight) != 0) {
3969 PBITrueProb =
3970 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3971 Likely = TTI->getPredictableBranchThreshold();
3972 }
3973
3974 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3975 // Speculate the 2nd condition unless the 1st is probably true.
3976 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3977 return {{BI->getSuccessor(0), Instruction::Or, false}};
3978 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3979 // Speculate the 2nd condition unless the 1st is probably false.
3980 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3981 return {{BI->getSuccessor(1), Instruction::And, false}};
3982 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3983 // Speculate the 2nd condition unless the 1st is probably true.
3984 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3985 return {{BI->getSuccessor(1), Instruction::And, true}};
3986 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3987 // Speculate the 2nd condition unless the 1st is probably false.
3988 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3989 return {{BI->getSuccessor(0), Instruction::Or, true}};
3990 }
3991 return std::nullopt;
3992}
3993
3995 DomTreeUpdater *DTU,
3996 MemorySSAUpdater *MSSAU,
3997 const TargetTransformInfo *TTI) {
3998 BasicBlock *BB = BI->getParent();
3999 BasicBlock *PredBlock = PBI->getParent();
4000
4001 // Determine if the two branches share a common destination.
4002 BasicBlock *CommonSucc;
4004 bool InvertPredCond;
4005 std::tie(CommonSucc, Opc, InvertPredCond) =
4007
4008 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4009
4010 IRBuilder<> Builder(PBI);
4011 // The builder is used to create instructions to eliminate the branch in BB.
4012 // If BB's terminator has !annotation metadata, add it to the new
4013 // instructions.
4014 Builder.CollectMetadataToCopy(BB->getTerminator(),
4015 {LLVMContext::MD_annotation});
4016
4017 // If we need to invert the condition in the pred block to match, do so now.
4018 if (InvertPredCond) {
4019 InvertBranch(PBI, Builder);
4020 }
4021
4022 BasicBlock *UniqueSucc =
4023 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4024
4025 // Before cloning instructions, notify the successor basic block that it
4026 // is about to have a new predecessor. This will update PHI nodes,
4027 // which will allow us to update live-out uses of bonus instructions.
4028 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4029
4030 // Try to update branch weights.
4031 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4032 SmallVector<uint64_t, 2> MDWeights;
4033 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4034 SuccTrueWeight, SuccFalseWeight)) {
4035
4036 if (PBI->getSuccessor(0) == BB) {
4037 // PBI: br i1 %x, BB, FalseDest
4038 // BI: br i1 %y, UniqueSucc, FalseDest
4039 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4040 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4041 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4042 // TrueWeight for PBI * FalseWeight for BI.
4043 // We assume that total weights of a BranchInst can fit into 32 bits.
4044 // Therefore, we will not have overflow using 64-bit arithmetic.
4045 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4046 PredTrueWeight * SuccFalseWeight);
4047 } else {
4048 // PBI: br i1 %x, TrueDest, BB
4049 // BI: br i1 %y, TrueDest, UniqueSucc
4050 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4051 // FalseWeight for PBI * TrueWeight for BI.
4052 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4053 PredFalseWeight * SuccTrueWeight);
4054 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4055 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4056 }
4057
4058 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4059 /*ElideAllZero=*/true);
4060
4061 // TODO: If BB is reachable from all paths through PredBlock, then we
4062 // could replace PBI's branch probabilities with BI's.
4063 } else
4064 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4065
4066 // Now, update the CFG.
4067 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4068
4069 if (DTU)
4070 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4071 {DominatorTree::Delete, PredBlock, BB}});
4072
4073 // If BI was a loop latch, it may have had associated loop metadata.
4074 // We need to copy it to the new latch, that is, PBI.
4075 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4076 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4077
4078 ValueToValueMapTy VMap; // maps original values to cloned values
4080
4081 Module *M = BB->getModule();
4082
4083 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4084 for (DbgVariableRecord &DVR :
4086 RemapDbgRecord(M, &DVR, VMap,
4088 }
4089
4090 // Now that the Cond was cloned into the predecessor basic block,
4091 // or/and the two conditions together.
4092 Value *BICond = VMap[BI->getCondition()];
4093 PBI->setCondition(
4094 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4096 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4097 if (!MDWeights.empty()) {
4098 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4099 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4100 /*IsExpected=*/false, /*ElideAllZero=*/true);
4101 }
4102
4103 ++NumFoldBranchToCommonDest;
4104 return true;
4105}
4106
4107/// Return if an instruction's type or any of its operands' types are a vector
4108/// type.
4109static bool isVectorOp(Instruction &I) {
4110 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4111 return U->getType()->isVectorTy();
4112 });
4113}
4114
4115/// If this basic block is simple enough, and if a predecessor branches to us
4116/// and one of our successors, fold the block into the predecessor and use
4117/// logical operations to pick the right destination.
4119 MemorySSAUpdater *MSSAU,
4120 const TargetTransformInfo *TTI,
4121 unsigned BonusInstThreshold) {
4122 // If this block ends with an unconditional branch,
4123 // let speculativelyExecuteBB() deal with it.
4124 if (!BI->isConditional())
4125 return false;
4126
4127 BasicBlock *BB = BI->getParent();
4131
4133
4135 Cond->getParent() != BB || !Cond->hasOneUse())
4136 return false;
4137
4138 // Finally, don't infinitely unroll conditional loops.
4139 if (is_contained(successors(BB), BB))
4140 return false;
4141
4142 // With which predecessors will we want to deal with?
4144 for (BasicBlock *PredBlock : predecessors(BB)) {
4145 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
4146
4147 // Check that we have two conditional branches. If there is a PHI node in
4148 // the common successor, verify that the same value flows in from both
4149 // blocks.
4150 if (!PBI || PBI->isUnconditional() || !safeToMergeTerminators(BI, PBI))
4151 continue;
4152
4153 // Determine if the two branches share a common destination.
4154 BasicBlock *CommonSucc;
4156 bool InvertPredCond;
4157 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4158 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4159 else
4160 continue;
4161
4162 // Check the cost of inserting the necessary logic before performing the
4163 // transformation.
4164 if (TTI) {
4165 Type *Ty = BI->getCondition()->getType();
4166 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4167 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4168 !isa<CmpInst>(PBI->getCondition())))
4169 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4170
4172 continue;
4173 }
4174
4175 // Ok, we do want to deal with this predecessor. Record it.
4176 Preds.emplace_back(PredBlock);
4177 }
4178
4179 // If there aren't any predecessors into which we can fold,
4180 // don't bother checking the cost.
4181 if (Preds.empty())
4182 return false;
4183
4184 // Only allow this transformation if computing the condition doesn't involve
4185 // too many instructions and these involved instructions can be executed
4186 // unconditionally. We denote all involved instructions except the condition
4187 // as "bonus instructions", and only allow this transformation when the
4188 // number of the bonus instructions we'll need to create when cloning into
4189 // each predecessor does not exceed a certain threshold.
4190 unsigned NumBonusInsts = 0;
4191 bool SawVectorOp = false;
4192 const unsigned PredCount = Preds.size();
4193 for (Instruction &I : *BB) {
4194 // Don't check the branch condition comparison itself.
4195 if (&I == Cond)
4196 continue;
4197 // Ignore the terminator.
4198 if (isa<BranchInst>(I))
4199 continue;
4200 // I must be safe to execute unconditionally.
4202 return false;
4203 SawVectorOp |= isVectorOp(I);
4204
4205 // Account for the cost of duplicating this instruction into each
4206 // predecessor. Ignore free instructions.
4207 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4209 NumBonusInsts += PredCount;
4210
4211 // Early exits once we reach the limit.
4212 if (NumBonusInsts >
4213 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4214 return false;
4215 }
4216
4217 auto IsBCSSAUse = [BB, &I](Use &U) {
4218 auto *UI = cast<Instruction>(U.getUser());
4219 if (auto *PN = dyn_cast<PHINode>(UI))
4220 return PN->getIncomingBlock(U) == BB;
4221 return UI->getParent() == BB && I.comesBefore(UI);
4222 };
4223
4224 // Does this instruction require rewriting of uses?
4225 if (!all_of(I.uses(), IsBCSSAUse))
4226 return false;
4227 }
4228 if (NumBonusInsts >
4229 BonusInstThreshold *
4230 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4231 return false;
4232
4233 // Ok, we have the budget. Perform the transformation.
4234 for (BasicBlock *PredBlock : Preds) {
4235 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4236 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4237 }
4238 return false;
4239}
4240
4241// If there is only one store in BB1 and BB2, return it, otherwise return
4242// nullptr.
4244 StoreInst *S = nullptr;
4245 for (auto *BB : {BB1, BB2}) {
4246 if (!BB)
4247 continue;
4248 for (auto &I : *BB)
4249 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4250 if (S)
4251 // Multiple stores seen.
4252 return nullptr;
4253 else
4254 S = SI;
4255 }
4256 }
4257 return S;
4258}
4259
4261 Value *AlternativeV = nullptr) {
4262 // PHI is going to be a PHI node that allows the value V that is defined in
4263 // BB to be referenced in BB's only successor.
4264 //
4265 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4266 // doesn't matter to us what the other operand is (it'll never get used). We
4267 // could just create a new PHI with an undef incoming value, but that could
4268 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4269 // other PHI. So here we directly look for some PHI in BB's successor with V
4270 // as an incoming operand. If we find one, we use it, else we create a new
4271 // one.
4272 //
4273 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4274 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4275 // where OtherBB is the single other predecessor of BB's only successor.
4276 PHINode *PHI = nullptr;
4277 BasicBlock *Succ = BB->getSingleSuccessor();
4278
4279 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4280 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4281 PHI = cast<PHINode>(I);
4282 if (!AlternativeV)
4283 break;
4284
4285 assert(Succ->hasNPredecessors(2));
4286 auto PredI = pred_begin(Succ);
4287 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4288 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4289 break;
4290 PHI = nullptr;
4291 }
4292 if (PHI)
4293 return PHI;
4294
4295 // If V is not an instruction defined in BB, just return it.
4296 if (!AlternativeV &&
4297 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4298 return V;
4299
4300 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4301 PHI->insertBefore(Succ->begin());
4302 PHI->addIncoming(V, BB);
4303 for (BasicBlock *PredBB : predecessors(Succ))
4304 if (PredBB != BB)
4305 PHI->addIncoming(
4306 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4307 return PHI;
4308}
4309
4311 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4312 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4313 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4314 // For every pointer, there must be exactly two stores, one coming from
4315 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4316 // store (to any address) in PTB,PFB or QTB,QFB.
4317 // FIXME: We could relax this restriction with a bit more work and performance
4318 // testing.
4319 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4320 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4321 if (!PStore || !QStore)
4322 return false;
4323
4324 // Now check the stores are compatible.
4325 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4326 PStore->getValueOperand()->getType() !=
4327 QStore->getValueOperand()->getType())
4328 return false;
4329
4330 // Check that sinking the store won't cause program behavior changes. Sinking
4331 // the store out of the Q blocks won't change any behavior as we're sinking
4332 // from a block to its unconditional successor. But we're moving a store from
4333 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4334 // So we need to check that there are no aliasing loads or stores in
4335 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4336 // operations between PStore and the end of its parent block.
4337 //
4338 // The ideal way to do this is to query AliasAnalysis, but we don't
4339 // preserve AA currently so that is dangerous. Be super safe and just
4340 // check there are no other memory operations at all.
4341 for (auto &I : *QFB->getSinglePredecessor())
4342 if (I.mayReadOrWriteMemory())
4343 return false;
4344 for (auto &I : *QFB)
4345 if (&I != QStore && I.mayReadOrWriteMemory())
4346 return false;
4347 if (QTB)
4348 for (auto &I : *QTB)
4349 if (&I != QStore && I.mayReadOrWriteMemory())
4350 return false;
4351 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4352 I != E; ++I)
4353 if (&*I != PStore && I->mayReadOrWriteMemory())
4354 return false;
4355
4356 // If we're not in aggressive mode, we only optimize if we have some
4357 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4358 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4359 if (!BB)
4360 return true;
4361 // Heuristic: if the block can be if-converted/phi-folded and the
4362 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4363 // thread this store.
4364 InstructionCost Cost = 0;
4365 InstructionCost Budget =
4367 for (auto &I : BB->instructionsWithoutDebug(false)) {
4368 // Consider terminator instruction to be free.
4369 if (I.isTerminator())
4370 continue;
4371 // If this is one the stores that we want to speculate out of this BB,
4372 // then don't count it's cost, consider it to be free.
4373 if (auto *S = dyn_cast<StoreInst>(&I))
4374 if (llvm::find(FreeStores, S))
4375 continue;
4376 // Else, we have a white-list of instructions that we are ak speculating.
4378 return false; // Not in white-list - not worthwhile folding.
4379 // And finally, if this is a non-free instruction that we are okay
4380 // speculating, ensure that we consider the speculation budget.
4381 Cost +=
4382 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4383 if (Cost > Budget)
4384 return false; // Eagerly refuse to fold as soon as we're out of budget.
4385 }
4386 assert(Cost <= Budget &&
4387 "When we run out of budget we will eagerly return from within the "
4388 "per-instruction loop.");
4389 return true;
4390 };
4391
4392 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4394 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4395 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4396 return false;
4397
4398 // If PostBB has more than two predecessors, we need to split it so we can
4399 // sink the store.
4400 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4401 // We know that QFB's only successor is PostBB. And QFB has a single
4402 // predecessor. If QTB exists, then its only successor is also PostBB.
4403 // If QTB does not exist, then QFB's only predecessor has a conditional
4404 // branch to QFB and PostBB.
4405 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4406 BasicBlock *NewBB =
4407 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4408 if (!NewBB)
4409 return false;
4410 PostBB = NewBB;
4411 }
4412
4413 // OK, we're going to sink the stores to PostBB. The store has to be
4414 // conditional though, so first create the predicate.
4415 BranchInst *PBranch =
4417 BranchInst *QBranch =
4419 Value *PCond = PBranch->getCondition();
4420 Value *QCond = QBranch->getCondition();
4421
4423 PStore->getParent());
4425 QStore->getParent(), PPHI);
4426
4427 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4428 IRBuilder<> QB(PostBB, PostBBFirst);
4429 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4430
4431 InvertPCond ^= (PStore->getParent() != PTB);
4432 InvertQCond ^= (QStore->getParent() != QTB);
4433 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4434 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4435
4436 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4437
4438 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4439 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4440 /*Unreachable=*/false,
4441 /*BranchWeights=*/nullptr, DTU);
4442 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4444 SmallVector<uint32_t, 2> PWeights, QWeights;
4445 extractBranchWeights(*PBranch, PWeights);
4446 extractBranchWeights(*QBranch, QWeights);
4447 if (InvertPCond)
4448 std::swap(PWeights[0], PWeights[1]);
4449 if (InvertQCond)
4450 std::swap(QWeights[0], QWeights[1]);
4451 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4453 {CombinedWeights[0], CombinedWeights[1]},
4454 /*IsExpected=*/false, /*ElideAllZero=*/true);
4455 }
4456
4457 QB.SetInsertPoint(T);
4458 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4459 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4460 // Choose the minimum alignment. If we could prove both stores execute, we
4461 // could use biggest one. In this case, though, we only know that one of the
4462 // stores executes. And we don't know it's safe to take the alignment from a
4463 // store that doesn't execute.
4464 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4465
4466 QStore->eraseFromParent();
4467 PStore->eraseFromParent();
4468
4469 return true;
4470}
4471
4473 DomTreeUpdater *DTU, const DataLayout &DL,
4474 const TargetTransformInfo &TTI) {
4475 // The intention here is to find diamonds or triangles (see below) where each
4476 // conditional block contains a store to the same address. Both of these
4477 // stores are conditional, so they can't be unconditionally sunk. But it may
4478 // be profitable to speculatively sink the stores into one merged store at the
4479 // end, and predicate the merged store on the union of the two conditions of
4480 // PBI and QBI.
4481 //
4482 // This can reduce the number of stores executed if both of the conditions are
4483 // true, and can allow the blocks to become small enough to be if-converted.
4484 // This optimization will also chain, so that ladders of test-and-set
4485 // sequences can be if-converted away.
4486 //
4487 // We only deal with simple diamonds or triangles:
4488 //
4489 // PBI or PBI or a combination of the two
4490 // / \ | \
4491 // PTB PFB | PFB
4492 // \ / | /
4493 // QBI QBI
4494 // / \ | \
4495 // QTB QFB | QFB
4496 // \ / | /
4497 // PostBB PostBB
4498 //
4499 // We model triangles as a type of diamond with a nullptr "true" block.
4500 // Triangles are canonicalized so that the fallthrough edge is represented by
4501 // a true condition, as in the diagram above.
4502 BasicBlock *PTB = PBI->getSuccessor(0);
4503 BasicBlock *PFB = PBI->getSuccessor(1);
4504 BasicBlock *QTB = QBI->getSuccessor(0);
4505 BasicBlock *QFB = QBI->getSuccessor(1);
4506 BasicBlock *PostBB = QFB->getSingleSuccessor();
4507
4508 // Make sure we have a good guess for PostBB. If QTB's only successor is
4509 // QFB, then QFB is a better PostBB.
4510 if (QTB->getSingleSuccessor() == QFB)
4511 PostBB = QFB;
4512
4513 // If we couldn't find a good PostBB, stop.
4514 if (!PostBB)
4515 return false;
4516
4517 bool InvertPCond = false, InvertQCond = false;
4518 // Canonicalize fallthroughs to the true branches.
4519 if (PFB == QBI->getParent()) {
4520 std::swap(PFB, PTB);
4521 InvertPCond = true;
4522 }
4523 if (QFB == PostBB) {
4524 std::swap(QFB, QTB);
4525 InvertQCond = true;
4526 }
4527
4528 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4529 // and QFB may not. Model fallthroughs as a nullptr block.
4530 if (PTB == QBI->getParent())
4531 PTB = nullptr;
4532 if (QTB == PostBB)
4533 QTB = nullptr;
4534
4535 // Legality bailouts. We must have at least the non-fallthrough blocks and
4536 // the post-dominating block, and the non-fallthroughs must only have one
4537 // predecessor.
4538 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4539 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4540 };
4541 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4542 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4543 return false;
4544 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4545 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4546 return false;
4547 if (!QBI->getParent()->hasNUses(2))
4548 return false;
4549
4550 // OK, this is a sequence of two diamonds or triangles.
4551 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4552 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4553 for (auto *BB : {PTB, PFB}) {
4554 if (!BB)
4555 continue;
4556 for (auto &I : *BB)
4558 PStoreAddresses.insert(SI->getPointerOperand());
4559 }
4560 for (auto *BB : {QTB, QFB}) {
4561 if (!BB)
4562 continue;
4563 for (auto &I : *BB)
4565 QStoreAddresses.insert(SI->getPointerOperand());
4566 }
4567
4568 set_intersect(PStoreAddresses, QStoreAddresses);
4569 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4570 // clear what it contains.
4571 auto &CommonAddresses = PStoreAddresses;
4572
4573 bool Changed = false;
4574 for (auto *Address : CommonAddresses)
4575 Changed |=
4576 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4577 InvertPCond, InvertQCond, DTU, DL, TTI);
4578 return Changed;
4579}
4580
4581/// If the previous block ended with a widenable branch, determine if reusing
4582/// the target block is profitable and legal. This will have the effect of
4583/// "widening" PBI, but doesn't require us to reason about hosting safety.
4585 DomTreeUpdater *DTU) {
4586 // TODO: This can be generalized in two important ways:
4587 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4588 // values from the PBI edge.
4589 // 2) We can sink side effecting instructions into BI's fallthrough
4590 // successor provided they doesn't contribute to computation of
4591 // BI's condition.
4592 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4593 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4594 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4595 !BI->getParent()->getSinglePredecessor())
4596 return false;
4597 if (!IfFalseBB->phis().empty())
4598 return false; // TODO
4599 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4600 // may undo the transform done here.
4601 // TODO: There might be a more fine-grained solution to this.
4602 if (!llvm::succ_empty(IfFalseBB))
4603 return false;
4604 // Use lambda to lazily compute expensive condition after cheap ones.
4605 auto NoSideEffects = [](BasicBlock &BB) {
4606 return llvm::none_of(BB, [](const Instruction &I) {
4607 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4608 });
4609 };
4610 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4611 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4612 NoSideEffects(*BI->getParent())) {
4613 auto *OldSuccessor = BI->getSuccessor(1);
4614 OldSuccessor->removePredecessor(BI->getParent());
4615 BI->setSuccessor(1, IfFalseBB);
4616 if (DTU)
4617 DTU->applyUpdates(
4618 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4619 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4620 return true;
4621 }
4622 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4623 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4624 NoSideEffects(*BI->getParent())) {
4625 auto *OldSuccessor = BI->getSuccessor(0);
4626 OldSuccessor->removePredecessor(BI->getParent());
4627 BI->setSuccessor(0, IfFalseBB);
4628 if (DTU)
4629 DTU->applyUpdates(
4630 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4631 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4632 return true;
4633 }
4634 return false;
4635}
4636
4637/// If we have a conditional branch as a predecessor of another block,
4638/// this function tries to simplify it. We know
4639/// that PBI and BI are both conditional branches, and BI is in one of the
4640/// successor blocks of PBI - PBI branches to BI.
4642 DomTreeUpdater *DTU,
4643 const DataLayout &DL,
4644 const TargetTransformInfo &TTI) {
4645 assert(PBI->isConditional() && BI->isConditional());
4646 BasicBlock *BB = BI->getParent();
4647
4648 // If this block ends with a branch instruction, and if there is a
4649 // predecessor that ends on a branch of the same condition, make
4650 // this conditional branch redundant.
4651 if (PBI->getCondition() == BI->getCondition() &&
4652 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4653 // Okay, the outcome of this conditional branch is statically
4654 // knowable. If this block had a single pred, handle specially, otherwise
4655 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4656 if (BB->getSinglePredecessor()) {
4657 // Turn this into a branch on constant.
4658 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4659 BI->setCondition(
4660 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4661 return true; // Nuke the branch on constant.
4662 }
4663 }
4664
4665 // If the previous block ended with a widenable branch, determine if reusing
4666 // the target block is profitable and legal. This will have the effect of
4667 // "widening" PBI, but doesn't require us to reason about hosting safety.
4668 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4669 return true;
4670
4671 // If both branches are conditional and both contain stores to the same
4672 // address, remove the stores from the conditionals and create a conditional
4673 // merged store at the end.
4674 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4675 return true;
4676
4677 // If this is a conditional branch in an empty block, and if any
4678 // predecessors are a conditional branch to one of our destinations,
4679 // fold the conditions into logical ops and one cond br.
4680
4681 // Ignore dbg intrinsics.
4682 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4683 return false;
4684
4685 int PBIOp, BIOp;
4686 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4687 PBIOp = 0;
4688 BIOp = 0;
4689 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4690 PBIOp = 0;
4691 BIOp = 1;
4692 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4693 PBIOp = 1;
4694 BIOp = 0;
4695 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4696 PBIOp = 1;
4697 BIOp = 1;
4698 } else {
4699 return false;
4700 }
4701
4702 // Check to make sure that the other destination of this branch
4703 // isn't BB itself. If so, this is an infinite loop that will
4704 // keep getting unwound.
4705 if (PBI->getSuccessor(PBIOp) == BB)
4706 return false;
4707
4708 // If predecessor's branch probability to BB is too low don't merge branches.
4709 SmallVector<uint32_t, 2> PredWeights;
4710 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4711 extractBranchWeights(*PBI, PredWeights) &&
4712 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4713
4715 PredWeights[PBIOp],
4716 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4717
4718 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4719 if (CommonDestProb >= Likely)
4720 return false;
4721 }
4722
4723 // Do not perform this transformation if it would require
4724 // insertion of a large number of select instructions. For targets
4725 // without predication/cmovs, this is a big pessimization.
4726
4727 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4728 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4729 unsigned NumPhis = 0;
4730 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4731 ++II, ++NumPhis) {
4732 if (NumPhis > 2) // Disable this xform.
4733 return false;
4734 }
4735
4736 // Finally, if everything is ok, fold the branches to logical ops.
4737 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4738
4739 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4740 << "AND: " << *BI->getParent());
4741
4743
4744 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4745 // branch in it, where one edge (OtherDest) goes back to itself but the other
4746 // exits. We don't *know* that the program avoids the infinite loop
4747 // (even though that seems likely). If we do this xform naively, we'll end up
4748 // recursively unpeeling the loop. Since we know that (after the xform is
4749 // done) that the block *is* infinite if reached, we just make it an obviously
4750 // infinite loop with no cond branch.
4751 if (OtherDest == BB) {
4752 // Insert it at the end of the function, because it's either code,
4753 // or it won't matter if it's hot. :)
4754 BasicBlock *InfLoopBlock =
4755 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4756 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4757 if (DTU)
4758 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4759 OtherDest = InfLoopBlock;
4760 }
4761
4762 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4763
4764 // BI may have other predecessors. Because of this, we leave
4765 // it alone, but modify PBI.
4766
4767 // Make sure we get to CommonDest on True&True directions.
4768 Value *PBICond = PBI->getCondition();
4769 IRBuilder<NoFolder> Builder(PBI);
4770 if (PBIOp)
4771 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4772
4773 Value *BICond = BI->getCondition();
4774 if (BIOp)
4775 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4776
4777 // Merge the conditions.
4778 Value *Cond =
4779 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4780
4781 // Modify PBI to branch on the new condition to the new dests.
4782 PBI->setCondition(Cond);
4783 PBI->setSuccessor(0, CommonDest);
4784 PBI->setSuccessor(1, OtherDest);
4785
4786 if (DTU) {
4787 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4788 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4789
4790 DTU->applyUpdates(Updates);
4791 }
4792
4793 // Update branch weight for PBI.
4794 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4795 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4796 bool HasWeights =
4797 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4798 SuccTrueWeight, SuccFalseWeight);
4799 if (HasWeights) {
4800 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4801 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4802 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4803 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4804 // The weight to CommonDest should be PredCommon * SuccTotal +
4805 // PredOther * SuccCommon.
4806 // The weight to OtherDest should be PredOther * SuccOther.
4807 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4808 PredOther * SuccCommon,
4809 PredOther * SuccOther};
4810
4811 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4812 /*ElideAllZero=*/true);
4813 // Cond may be a select instruction with the first operand set to "true", or
4814 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4816 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4817 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4818 // The select is predicated on PBICond
4820 // The corresponding probabilities are what was referred to above as
4821 // PredCommon and PredOther.
4822 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4823 /*IsExpected=*/false, /*ElideAllZero=*/true);
4824 }
4825 }
4826
4827 // OtherDest may have phi nodes. If so, add an entry from PBI's
4828 // block that are identical to the entries for BI's block.
4829 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4830
4831 // We know that the CommonDest already had an edge from PBI to
4832 // it. If it has PHIs though, the PHIs may have different
4833 // entries for BB and PBI's BB. If so, insert a select to make
4834 // them agree.
4835 for (PHINode &PN : CommonDest->phis()) {
4836 Value *BIV = PN.getIncomingValueForBlock(BB);
4837 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4838 Value *PBIV = PN.getIncomingValue(PBBIdx);
4839 if (BIV != PBIV) {
4840 // Insert a select in PBI to pick the right value.
4842 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4843 PN.setIncomingValue(PBBIdx, NV);
4844 // The select has the same condition as PBI, in the same BB. The
4845 // probabilities don't change.
4846 if (HasWeights) {
4847 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4848 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4849 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4850 /*IsExpected=*/false, /*ElideAllZero=*/true);
4851 }
4852 }
4853 }
4854
4855 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4856 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4857
4858 // This basic block is probably dead. We know it has at least
4859 // one fewer predecessor.
4860 return true;
4861}
4862
4863// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4864// true or to FalseBB if Cond is false.
4865// Takes care of updating the successors and removing the old terminator.
4866// Also makes sure not to introduce new successors by assuming that edges to
4867// non-successor TrueBBs and FalseBBs aren't reachable.
4868bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4869 Value *Cond, BasicBlock *TrueBB,
4870 BasicBlock *FalseBB,
4871 uint32_t TrueWeight,
4872 uint32_t FalseWeight) {
4873 auto *BB = OldTerm->getParent();
4874 // Remove any superfluous successor edges from the CFG.
4875 // First, figure out which successors to preserve.
4876 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4877 // successor.
4878 BasicBlock *KeepEdge1 = TrueBB;
4879 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4880
4881 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4882
4883 // Then remove the rest.
4884 for (BasicBlock *Succ : successors(OldTerm)) {
4885 // Make sure only to keep exactly one copy of each edge.
4886 if (Succ == KeepEdge1)
4887 KeepEdge1 = nullptr;
4888 else if (Succ == KeepEdge2)
4889 KeepEdge2 = nullptr;
4890 else {
4891 Succ->removePredecessor(BB,
4892 /*KeepOneInputPHIs=*/true);
4893
4894 if (Succ != TrueBB && Succ != FalseBB)
4895 RemovedSuccessors.insert(Succ);
4896 }
4897 }
4898
4899 IRBuilder<> Builder(OldTerm);
4900 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4901
4902 // Insert an appropriate new terminator.
4903 if (!KeepEdge1 && !KeepEdge2) {
4904 if (TrueBB == FalseBB) {
4905 // We were only looking for one successor, and it was present.
4906 // Create an unconditional branch to it.
4907 Builder.CreateBr(TrueBB);
4908 } else {
4909 // We found both of the successors we were looking for.
4910 // Create a conditional branch sharing the condition of the select.
4911 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4912 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4913 /*IsExpected=*/false, /*ElideAllZero=*/true);
4914 }
4915 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4916 // Neither of the selected blocks were successors, so this
4917 // terminator must be unreachable.
4918 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4919 } else {
4920 // One of the selected values was a successor, but the other wasn't.
4921 // Insert an unconditional branch to the one that was found;
4922 // the edge to the one that wasn't must be unreachable.
4923 if (!KeepEdge1) {
4924 // Only TrueBB was found.
4925 Builder.CreateBr(TrueBB);
4926 } else {
4927 // Only FalseBB was found.
4928 Builder.CreateBr(FalseBB);
4929 }
4930 }
4931
4933
4934 if (DTU) {
4935 SmallVector<DominatorTree::UpdateType, 2> Updates;
4936 Updates.reserve(RemovedSuccessors.size());
4937 for (auto *RemovedSuccessor : RemovedSuccessors)
4938 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4939 DTU->applyUpdates(Updates);
4940 }
4941
4942 return true;
4943}
4944
4945// Replaces
4946// (switch (select cond, X, Y)) on constant X, Y
4947// with a branch - conditional if X and Y lead to distinct BBs,
4948// unconditional otherwise.
4949bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4950 SelectInst *Select) {
4951 // Check for constant integer values in the select.
4952 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4953 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4954 if (!TrueVal || !FalseVal)
4955 return false;
4956
4957 // Find the relevant condition and destinations.
4958 Value *Condition = Select->getCondition();
4959 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4960 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4961
4962 // Get weight for TrueBB and FalseBB.
4963 uint32_t TrueWeight = 0, FalseWeight = 0;
4964 SmallVector<uint64_t, 8> Weights;
4965 bool HasWeights = hasBranchWeightMD(*SI);
4966 if (HasWeights) {
4967 getBranchWeights(SI, Weights);
4968 if (Weights.size() == 1 + SI->getNumCases()) {
4969 TrueWeight =
4970 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4971 FalseWeight =
4972 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4973 }
4974 }
4975
4976 // Perform the actual simplification.
4977 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4978 FalseWeight);
4979}
4980
4981// Replaces
4982// (indirectbr (select cond, blockaddress(@fn, BlockA),
4983// blockaddress(@fn, BlockB)))
4984// with
4985// (br cond, BlockA, BlockB).
4986bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4987 SelectInst *SI) {
4988 // Check that both operands of the select are block addresses.
4989 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4990 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4991 if (!TBA || !FBA)
4992 return false;
4993
4994 // Extract the actual blocks.
4995 BasicBlock *TrueBB = TBA->getBasicBlock();
4996 BasicBlock *FalseBB = FBA->getBasicBlock();
4997
4998 // The select's profile becomes the profile of the conditional branch that
4999 // replaces the indirect branch.
5000 SmallVector<uint32_t> SelectBranchWeights(2);
5002 extractBranchWeights(*SI, SelectBranchWeights);
5003 // Perform the actual simplification.
5004 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5005 SelectBranchWeights[0],
5006 SelectBranchWeights[1]);
5007}
5008
5009/// This is called when we find an icmp instruction
5010/// (a seteq/setne with a constant) as the only instruction in a
5011/// block that ends with an uncond branch. We are looking for a very specific
5012/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5013/// this case, we merge the first two "or's of icmp" into a switch, but then the
5014/// default value goes to an uncond block with a seteq in it, we get something
5015/// like:
5016///
5017/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5018/// DEFAULT:
5019/// %tmp = icmp eq i8 %A, 92
5020/// br label %end
5021/// end:
5022/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5023///
5024/// We prefer to split the edge to 'end' so that there is a true/false entry to
5025/// the PHI, merging the third icmp into the switch.
5026bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5027 ICmpInst *ICI, IRBuilder<> &Builder) {
5028 // Select == nullptr means we assume that there is a hidden no-op select
5029 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5030 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5031}
5032
5033/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5034/// case. This is called when we find an icmp instruction (a seteq/setne with a
5035/// constant) and its following select instruction as the only TWO instructions
5036/// in a block that ends with an uncond branch. We are looking for a very
5037/// specific pattern that occurs when "
5038/// if (A == 1) return C1;
5039/// if (A == 2) return C2;
5040/// if (A < 3) return C3;
5041/// return C4;
5042/// " gets simplified. In this case, we merge the first two "branches of icmp"
5043/// into a switch, but then the default value goes to an uncond block with a lt
5044/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5045/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5046/// get something like:
5047///
5048/// case1:
5049/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5050/// case2:
5051/// br label %end
5052/// DEFAULT:
5053/// %tmp = icmp eq i8 %A, 2
5054/// %val = select i1 %tmp, i8 C3, i8 C4
5055/// br label %end
5056/// end:
5057/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5058///
5059/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5060/// to the PHI, merging the icmp & select into the switch, as follows:
5061///
5062/// case1:
5063/// switch i8 %A, label %DEFAULT [
5064/// i8 0, label %end
5065/// i8 1, label %case2
5066/// i8 2, label %case3
5067/// ]
5068/// case2:
5069/// br label %end
5070/// case3:
5071/// br label %end
5072/// DEFAULT:
5073/// br label %end
5074/// end:
5075/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5076bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5077 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5078 BasicBlock *BB = ICI->getParent();
5079
5080 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5081 // too complex.
5082 /// TODO: support multi-phis in succ BB of select's BB.
5083 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5084 (Select && !Select->hasOneUse()))
5085 return false;
5086
5087 // The pattern we're looking for is where our only predecessor is a switch on
5088 // 'V' and this block is the default case for the switch. In this case we can
5089 // fold the compared value into the switch to simplify things.
5090 BasicBlock *Pred = BB->getSinglePredecessor();
5091 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5092 return false;
5093
5094 Value *IcmpCond;
5095 ConstantInt *NewCaseVal;
5096 CmpPredicate Predicate;
5097
5098 // Match icmp X, C
5099 if (!match(ICI,
5100 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5101 return false;
5102
5103 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5105 if (!Select) {
5106 // If Select == nullptr, we can assume that there is a hidden no-op select
5107 // just after icmp
5108 SelectCond = ICI;
5109 SelectTrueVal = Builder.getTrue();
5110 SelectFalseVal = Builder.getFalse();
5111 User = ICI->user_back();
5112 } else {
5113 SelectCond = Select->getCondition();
5114 // Check if the select condition is the same as the icmp condition.
5115 if (SelectCond != ICI)
5116 return false;
5117 SelectTrueVal = Select->getTrueValue();
5118 SelectFalseVal = Select->getFalseValue();
5119 User = Select->user_back();
5120 }
5121
5122 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5123 if (SI->getCondition() != IcmpCond)
5124 return false;
5125
5126 // If BB is reachable on a non-default case, then we simply know the value of
5127 // V in this block. Substitute it and constant fold the icmp instruction
5128 // away.
5129 if (SI->getDefaultDest() != BB) {
5130 ConstantInt *VVal = SI->findCaseDest(BB);
5131 assert(VVal && "Should have a unique destination value");
5132 ICI->setOperand(0, VVal);
5133
5134 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5135 ICI->replaceAllUsesWith(V);
5136 ICI->eraseFromParent();
5137 }
5138 // BB is now empty, so it is likely to simplify away.
5139 return requestResimplify();
5140 }
5141
5142 // Ok, the block is reachable from the default dest. If the constant we're
5143 // comparing exists in one of the other edges, then we can constant fold ICI
5144 // and zap it.
5145 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5146 Value *V;
5147 if (Predicate == ICmpInst::ICMP_EQ)
5149 else
5151
5152 ICI->replaceAllUsesWith(V);
5153 ICI->eraseFromParent();
5154 // BB is now empty, so it is likely to simplify away.
5155 return requestResimplify();
5156 }
5157
5158 // The use of the select has to be in the 'end' block, by the only PHI node in
5159 // the block.
5160 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5161 PHINode *PHIUse = dyn_cast<PHINode>(User);
5162 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5164 return false;
5165
5166 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5167 // edge gets SelectTrueVal in the PHI.
5168 Value *DefaultCst = SelectFalseVal;
5169 Value *NewCst = SelectTrueVal;
5170
5171 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5172 std::swap(DefaultCst, NewCst);
5173
5174 // Replace Select (which is used by the PHI for the default value) with
5175 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5176 if (Select) {
5177 Select->replaceAllUsesWith(DefaultCst);
5178 Select->eraseFromParent();
5179 } else {
5180 ICI->replaceAllUsesWith(DefaultCst);
5181 }
5182 ICI->eraseFromParent();
5183
5184 SmallVector<DominatorTree::UpdateType, 2> Updates;
5185
5186 // Okay, the switch goes to this block on a default value. Add an edge from
5187 // the switch to the merge point on the compared value.
5188 BasicBlock *NewBB =
5189 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5190 {
5191 SwitchInstProfUpdateWrapper SIW(*SI);
5192 auto W0 = SIW.getSuccessorWeight(0);
5194 if (W0) {
5195 NewW = ((uint64_t(*W0) + 1) >> 1);
5196 SIW.setSuccessorWeight(0, *NewW);
5197 }
5198 SIW.addCase(NewCaseVal, NewBB, NewW);
5199 if (DTU)
5200 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5201 }
5202
5203 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5204 Builder.SetInsertPoint(NewBB);
5205 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5206 Builder.CreateBr(SuccBlock);
5207 PHIUse->addIncoming(NewCst, NewBB);
5208 if (DTU) {
5209 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5210 DTU->applyUpdates(Updates);
5211 }
5212 return true;
5213}
5214
5215/// The specified branch is a conditional branch.
5216/// Check to see if it is branching on an or/and chain of icmp instructions, and
5217/// fold it into a switch instruction if so.
5218bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
5219 IRBuilder<> &Builder,
5220 const DataLayout &DL) {
5222 if (!Cond)
5223 return false;
5224
5225 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5226 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5227 // 'setne's and'ed together, collect them.
5228
5229 // Try to gather values from a chain of and/or to be turned into a switch
5230 ConstantComparesGatherer ConstantCompare(Cond, DL);
5231 // Unpack the result
5232 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5233 Value *CompVal = ConstantCompare.CompValue;
5234 unsigned UsedICmps = ConstantCompare.UsedICmps;
5235 Value *ExtraCase = ConstantCompare.Extra;
5236 bool TrueWhenEqual = ConstantCompare.IsEq;
5237
5238 // If we didn't have a multiply compared value, fail.
5239 if (!CompVal)
5240 return false;
5241
5242 // Avoid turning single icmps into a switch.
5243 if (UsedICmps <= 1)
5244 return false;
5245
5246 // There might be duplicate constants in the list, which the switch
5247 // instruction can't handle, remove them now.
5248 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5249 Values.erase(llvm::unique(Values), Values.end());
5250
5251 // If Extra was used, we require at least two switch values to do the
5252 // transformation. A switch with one value is just a conditional branch.
5253 if (ExtraCase && Values.size() < 2)
5254 return false;
5255
5256 SmallVector<uint32_t> BranchWeights;
5257 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5258 extractBranchWeights(*BI, BranchWeights);
5259
5260 // Figure out which block is which destination.
5261 BasicBlock *DefaultBB = BI->getSuccessor(1);
5262 BasicBlock *EdgeBB = BI->getSuccessor(0);
5263 if (!TrueWhenEqual) {
5264 std::swap(DefaultBB, EdgeBB);
5265 if (HasProfile)
5266 std::swap(BranchWeights[0], BranchWeights[1]);
5267 }
5268
5269 BasicBlock *BB = BI->getParent();
5270
5271 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5272 << " cases into SWITCH. BB is:\n"
5273 << *BB);
5274
5275 SmallVector<DominatorTree::UpdateType, 2> Updates;
5276
5277 // If there are any extra values that couldn't be folded into the switch
5278 // then we evaluate them with an explicit branch first. Split the block
5279 // right before the condbr to handle it.
5280 if (ExtraCase) {
5281 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5282 /*MSSAU=*/nullptr, "switch.early.test");
5283
5284 // Remove the uncond branch added to the old block.
5285 Instruction *OldTI = BB->getTerminator();
5286 Builder.SetInsertPoint(OldTI);
5287
5288 // There can be an unintended UB if extra values are Poison. Before the
5289 // transformation, extra values may not be evaluated according to the
5290 // condition, and it will not raise UB. But after transformation, we are
5291 // evaluating extra values before checking the condition, and it will raise
5292 // UB. It can be solved by adding freeze instruction to extra values.
5293 AssumptionCache *AC = Options.AC;
5294
5295 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5296 ExtraCase = Builder.CreateFreeze(ExtraCase);
5297
5298 // We don't have any info about this condition.
5299 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5300 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5302
5303 OldTI->eraseFromParent();
5304
5305 if (DTU)
5306 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5307
5308 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5309 // for the edge we just added.
5310 addPredecessorToBlock(EdgeBB, BB, NewBB);
5311
5312 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5313 << "\nEXTRABB = " << *BB);
5314 BB = NewBB;
5315 }
5316
5317 Builder.SetInsertPoint(BI);
5318 // Convert pointer to int before we switch.
5319 if (CompVal->getType()->isPointerTy()) {
5320 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5321 "Should not end up here with unstable pointers");
5322 CompVal = Builder.CreatePtrToInt(
5323 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5324 }
5325
5326 // Check if we can represent the values as a contiguous range. If so, we use a
5327 // range check + conditional branch instead of a switch.
5328 if (Values.front()->getValue() - Values.back()->getValue() ==
5329 Values.size() - 1) {
5330 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5331 Values.back()->getValue(), Values.front()->getValue() + 1);
5332 APInt Offset, RHS;
5333 ICmpInst::Predicate Pred;
5334 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5335 Value *X = CompVal;
5336 if (!Offset.isZero())
5337 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5338 Value *Cond =
5339 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5340 BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5341 if (HasProfile)
5342 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5343 // We don't need to update PHI nodes since we don't add any new edges.
5344 } else {
5345 // Create the new switch instruction now.
5346 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5347 if (HasProfile) {
5348 // We know the weight of the default case. We don't know the weight of the
5349 // other cases, but rather than completely lose profiling info, we split
5350 // the remaining probability equally over them.
5351 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5352 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5353 // if TrueWhenEqual.
5354 for (auto &V : drop_begin(NewWeights))
5355 V = BranchWeights[0] / Values.size();
5356 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5357 }
5358
5359 // Add all of the 'cases' to the switch instruction.
5360 for (ConstantInt *Val : Values)
5361 New->addCase(Val, EdgeBB);
5362
5363 // We added edges from PI to the EdgeBB. As such, if there were any
5364 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5365 // the number of edges added.
5366 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5367 PHINode *PN = cast<PHINode>(BBI);
5368 Value *InVal = PN->getIncomingValueForBlock(BB);
5369 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5370 PN->addIncoming(InVal, BB);
5371 }
5372 }
5373
5374 // Erase the old branch instruction.
5376 if (DTU)
5377 DTU->applyUpdates(Updates);
5378
5379 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5380 return true;
5381}
5382
5383bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5384 if (isa<PHINode>(RI->getValue()))
5385 return simplifyCommonResume(RI);
5386 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5387 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5388 // The resume must unwind the exception that caused control to branch here.
5389 return simplifySingleResume(RI);
5390
5391 return false;
5392}
5393
5394// Check if cleanup block is empty
5396 for (Instruction &I : R) {
5397 auto *II = dyn_cast<IntrinsicInst>(&I);
5398 if (!II)
5399 return false;
5400
5401 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5402 switch (IntrinsicID) {
5403 case Intrinsic::dbg_declare:
5404 case Intrinsic::dbg_value:
5405 case Intrinsic::dbg_label:
5406 case Intrinsic::lifetime_end:
5407 break;
5408 default:
5409 return false;
5410 }
5411 }
5412 return true;
5413}
5414
5415// Simplify resume that is shared by several landing pads (phi of landing pad).
5416bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5417 BasicBlock *BB = RI->getParent();
5418
5419 // Check that there are no other instructions except for debug and lifetime
5420 // intrinsics between the phi's and resume instruction.
5421 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5422 BB->getTerminator()->getIterator())))
5423 return false;
5424
5425 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5426 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5427
5428 // Check incoming blocks to see if any of them are trivial.
5429 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5430 Idx++) {
5431 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5432 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5433
5434 // If the block has other successors, we can not delete it because
5435 // it has other dependents.
5436 if (IncomingBB->getUniqueSuccessor() != BB)
5437 continue;
5438
5439 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5440 // Not the landing pad that caused the control to branch here.
5441 if (IncomingValue != LandingPad)
5442 continue;
5443
5445 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5446 TrivialUnwindBlocks.insert(IncomingBB);
5447 }
5448
5449 // If no trivial unwind blocks, don't do any simplifications.
5450 if (TrivialUnwindBlocks.empty())
5451 return false;
5452
5453 // Turn all invokes that unwind here into calls.
5454 for (auto *TrivialBB : TrivialUnwindBlocks) {
5455 // Blocks that will be simplified should be removed from the phi node.
5456 // Note there could be multiple edges to the resume block, and we need
5457 // to remove them all.
5458 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5459 BB->removePredecessor(TrivialBB, true);
5460
5461 for (BasicBlock *Pred :
5463 removeUnwindEdge(Pred, DTU);
5464 ++NumInvokes;
5465 }
5466
5467 // In each SimplifyCFG run, only the current processed block can be erased.
5468 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5469 // of erasing TrivialBB, we only remove the branch to the common resume
5470 // block so that we can later erase the resume block since it has no
5471 // predecessors.
5472 TrivialBB->getTerminator()->eraseFromParent();
5473 new UnreachableInst(RI->getContext(), TrivialBB);
5474 if (DTU)
5475 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5476 }
5477
5478 // Delete the resume block if all its predecessors have been removed.
5479 if (pred_empty(BB))
5480 DeleteDeadBlock(BB, DTU);
5481
5482 return !TrivialUnwindBlocks.empty();
5483}
5484
5485// Simplify resume that is only used by a single (non-phi) landing pad.
5486bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5487 BasicBlock *BB = RI->getParent();
5488 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5489 assert(RI->getValue() == LPInst &&
5490 "Resume must unwind the exception that caused control to here");
5491
5492 // Check that there are no other instructions except for debug intrinsics.
5494 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5495 return false;
5496
5497 // Turn all invokes that unwind here into calls and delete the basic block.
5498 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5499 removeUnwindEdge(Pred, DTU);
5500 ++NumInvokes;
5501 }
5502
5503 // The landingpad is now unreachable. Zap it.
5504 DeleteDeadBlock(BB, DTU);
5505 return true;
5506}
5507
5509 // If this is a trivial cleanup pad that executes no instructions, it can be
5510 // eliminated. If the cleanup pad continues to the caller, any predecessor
5511 // that is an EH pad will be updated to continue to the caller and any
5512 // predecessor that terminates with an invoke instruction will have its invoke
5513 // instruction converted to a call instruction. If the cleanup pad being
5514 // simplified does not continue to the caller, each predecessor will be
5515 // updated to continue to the unwind destination of the cleanup pad being
5516 // simplified.
5517 BasicBlock *BB = RI->getParent();
5518 CleanupPadInst *CPInst = RI->getCleanupPad();
5519 if (CPInst->getParent() != BB)
5520 // This isn't an empty cleanup.
5521 return false;
5522
5523 // We cannot kill the pad if it has multiple uses. This typically arises
5524 // from unreachable basic blocks.
5525 if (!CPInst->hasOneUse())
5526 return false;
5527
5528 // Check that there are no other instructions except for benign intrinsics.
5530 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5531 return false;
5532
5533 // If the cleanup return we are simplifying unwinds to the caller, this will
5534 // set UnwindDest to nullptr.
5535 BasicBlock *UnwindDest = RI->getUnwindDest();
5536
5537 // We're about to remove BB from the control flow. Before we do, sink any
5538 // PHINodes into the unwind destination. Doing this before changing the
5539 // control flow avoids some potentially slow checks, since we can currently
5540 // be certain that UnwindDest and BB have no common predecessors (since they
5541 // are both EH pads).
5542 if (UnwindDest) {
5543 // First, go through the PHI nodes in UnwindDest and update any nodes that
5544 // reference the block we are removing
5545 for (PHINode &DestPN : UnwindDest->phis()) {
5546 int Idx = DestPN.getBasicBlockIndex(BB);
5547 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5548 assert(Idx != -1);
5549 // This PHI node has an incoming value that corresponds to a control
5550 // path through the cleanup pad we are removing. If the incoming
5551 // value is in the cleanup pad, it must be a PHINode (because we
5552 // verified above that the block is otherwise empty). Otherwise, the
5553 // value is either a constant or a value that dominates the cleanup
5554 // pad being removed.
5555 //
5556 // Because BB and UnwindDest are both EH pads, all of their
5557 // predecessors must unwind to these blocks, and since no instruction
5558 // can have multiple unwind destinations, there will be no overlap in
5559 // incoming blocks between SrcPN and DestPN.
5560 Value *SrcVal = DestPN.getIncomingValue(Idx);
5561 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5562
5563 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5564 for (auto *Pred : predecessors(BB)) {
5565 Value *Incoming =
5566 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5567 DestPN.addIncoming(Incoming, Pred);
5568 }
5569 }
5570
5571 // Sink any remaining PHI nodes directly into UnwindDest.
5572 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5573 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5574 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5575 // If the PHI node has no uses or all of its uses are in this basic
5576 // block (meaning they are debug or lifetime intrinsics), just leave
5577 // it. It will be erased when we erase BB below.
5578 continue;
5579
5580 // Otherwise, sink this PHI node into UnwindDest.
5581 // Any predecessors to UnwindDest which are not already represented
5582 // must be back edges which inherit the value from the path through
5583 // BB. In this case, the PHI value must reference itself.
5584 for (auto *pred : predecessors(UnwindDest))
5585 if (pred != BB)
5586 PN.addIncoming(&PN, pred);
5587 PN.moveBefore(InsertPt);
5588 // Also, add a dummy incoming value for the original BB itself,
5589 // so that the PHI is well-formed until we drop said predecessor.
5590 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5591 }
5592 }
5593
5594 std::vector<DominatorTree::UpdateType> Updates;
5595
5596 // We use make_early_inc_range here because we will remove all predecessors.
5598 if (UnwindDest == nullptr) {
5599 if (DTU) {
5600 DTU->applyUpdates(Updates);
5601 Updates.clear();
5602 }
5603 removeUnwindEdge(PredBB, DTU);
5604 ++NumInvokes;
5605 } else {
5606 BB->removePredecessor(PredBB);
5607 Instruction *TI = PredBB->getTerminator();
5608 TI->replaceUsesOfWith(BB, UnwindDest);
5609 if (DTU) {
5610 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5611 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5612 }
5613 }
5614 }
5615
5616 if (DTU)
5617 DTU->applyUpdates(Updates);
5618
5619 DeleteDeadBlock(BB, DTU);
5620
5621 return true;
5622}
5623
5624// Try to merge two cleanuppads together.
5626 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5627 // with.
5628 BasicBlock *UnwindDest = RI->getUnwindDest();
5629 if (!UnwindDest)
5630 return false;
5631
5632 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5633 // be safe to merge without code duplication.
5634 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5635 return false;
5636
5637 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5638 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5639 if (!SuccessorCleanupPad)
5640 return false;
5641
5642 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5643 // Replace any uses of the successor cleanupad with the predecessor pad
5644 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5645 // funclet bundle operands.
5646 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5647 // Remove the old cleanuppad.
5648 SuccessorCleanupPad->eraseFromParent();
5649 // Now, we simply replace the cleanupret with a branch to the unwind
5650 // destination.
5651 BranchInst::Create(UnwindDest, RI->getParent());
5652 RI->eraseFromParent();
5653
5654 return true;
5655}
5656
5657bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5658 // It is possible to transiantly have an undef cleanuppad operand because we
5659 // have deleted some, but not all, dead blocks.
5660 // Eventually, this block will be deleted.
5661 if (isa<UndefValue>(RI->getOperand(0)))
5662 return false;
5663
5664 if (mergeCleanupPad(RI))
5665 return true;
5666
5667 if (removeEmptyCleanup(RI, DTU))
5668 return true;
5669
5670 return false;
5671}
5672
5673// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5674bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5675 BasicBlock *BB = UI->getParent();
5676
5677 bool Changed = false;
5678
5679 // Ensure that any debug-info records that used to occur after the Unreachable
5680 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5681 // the block.
5683
5684 // Debug-info records on the unreachable inst itself should be deleted, as
5685 // below we delete everything past the final executable instruction.
5686 UI->dropDbgRecords();
5687
5688 // If there are any instructions immediately before the unreachable that can
5689 // be removed, do so.
5690 while (UI->getIterator() != BB->begin()) {
5692 --BBI;
5693
5695 break; // Can not drop any more instructions. We're done here.
5696 // Otherwise, this instruction can be freely erased,
5697 // even if it is not side-effect free.
5698
5699 // Note that deleting EH's here is in fact okay, although it involves a bit
5700 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5701 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5702 // and we can therefore guarantee this block will be erased.
5703
5704 // If we're deleting this, we're deleting any subsequent debug info, so
5705 // delete DbgRecords.
5706 BBI->dropDbgRecords();
5707
5708 // Delete this instruction (any uses are guaranteed to be dead)
5709 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5710 BBI->eraseFromParent();
5711 Changed = true;
5712 }
5713
5714 // If the unreachable instruction is the first in the block, take a gander
5715 // at all of the predecessors of this instruction, and simplify them.
5716 if (&BB->front() != UI)
5717 return Changed;
5718
5719 std::vector<DominatorTree::UpdateType> Updates;
5720
5721 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5722 for (BasicBlock *Predecessor : Preds) {
5723 Instruction *TI = Predecessor->getTerminator();
5724 IRBuilder<> Builder(TI);
5725 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5726 // We could either have a proper unconditional branch,
5727 // or a degenerate conditional branch with matching destinations.
5728 if (all_of(BI->successors(),
5729 [BB](auto *Successor) { return Successor == BB; })) {
5730 new UnreachableInst(TI->getContext(), TI->getIterator());
5731 TI->eraseFromParent();
5732 Changed = true;
5733 } else {
5734 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5735 Value* Cond = BI->getCondition();
5736 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5737 "The destinations are guaranteed to be different here.");
5738 CallInst *Assumption;
5739 if (BI->getSuccessor(0) == BB) {
5740 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5741 Builder.CreateBr(BI->getSuccessor(1));
5742 } else {
5743 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5744 Assumption = Builder.CreateAssumption(Cond);
5745 Builder.CreateBr(BI->getSuccessor(0));
5746 }
5747 if (Options.AC)
5748 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5749
5751 Changed = true;
5752 }
5753 if (DTU)
5754 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5755 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5756 SwitchInstProfUpdateWrapper SU(*SI);
5757 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5758 if (i->getCaseSuccessor() != BB) {
5759 ++i;
5760 continue;
5761 }
5762 BB->removePredecessor(SU->getParent());
5763 i = SU.removeCase(i);
5764 e = SU->case_end();
5765 Changed = true;
5766 }
5767 // Note that the default destination can't be removed!
5768 if (DTU && SI->getDefaultDest() != BB)
5769 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5770 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5771 if (II->getUnwindDest() == BB) {
5772 if (DTU) {
5773 DTU->applyUpdates(Updates);
5774 Updates.clear();
5775 }
5776 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5777 if (!CI->doesNotThrow())
5778 CI->setDoesNotThrow();
5779 Changed = true;
5780 }
5781 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5782 if (CSI->getUnwindDest() == BB) {
5783 if (DTU) {
5784 DTU->applyUpdates(Updates);
5785 Updates.clear();
5786 }
5787 removeUnwindEdge(TI->getParent(), DTU);
5788 Changed = true;
5789 continue;
5790 }
5791
5792 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5793 E = CSI->handler_end();
5794 I != E; ++I) {
5795 if (*I == BB) {
5796 CSI->removeHandler(I);
5797 --I;
5798 --E;
5799 Changed = true;
5800 }
5801 }
5802 if (DTU)
5803 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5804 if (CSI->getNumHandlers() == 0) {
5805 if (CSI->hasUnwindDest()) {
5806 // Redirect all predecessors of the block containing CatchSwitchInst
5807 // to instead branch to the CatchSwitchInst's unwind destination.
5808 if (DTU) {
5809 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5810 Updates.push_back({DominatorTree::Insert,
5811 PredecessorOfPredecessor,
5812 CSI->getUnwindDest()});
5813 Updates.push_back({DominatorTree::Delete,
5814 PredecessorOfPredecessor, Predecessor});
5815 }
5816 }
5817 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5818 } else {
5819 // Rewrite all preds to unwind to caller (or from invoke to call).
5820 if (DTU) {
5821 DTU->applyUpdates(Updates);
5822 Updates.clear();
5823 }
5824 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5825 for (BasicBlock *EHPred : EHPreds)
5826 removeUnwindEdge(EHPred, DTU);
5827 }
5828 // The catchswitch is no longer reachable.
5829 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5830 CSI->eraseFromParent();
5831 Changed = true;
5832 }
5833 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5834 (void)CRI;
5835 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5836 "Expected to always have an unwind to BB.");
5837 if (DTU)
5838 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5839 new UnreachableInst(TI->getContext(), TI->getIterator());
5840 TI->eraseFromParent();
5841 Changed = true;
5842 }
5843 }
5844
5845 if (DTU)
5846 DTU->applyUpdates(Updates);
5847
5848 // If this block is now dead, remove it.
5849 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5850 DeleteDeadBlock(BB, DTU);
5851 return true;
5852 }
5853
5854 return Changed;
5855}
5856
5865
5866static std::optional<ContiguousCasesResult>
5869 BasicBlock *Dest, BasicBlock *OtherDest) {
5870 assert(Cases.size() >= 1);
5871
5873 const APInt &Min = Cases.back()->getValue();
5874 const APInt &Max = Cases.front()->getValue();
5875 APInt Offset = Max - Min;
5876 size_t ContiguousOffset = Cases.size() - 1;
5877 if (Offset == ContiguousOffset) {
5878 return ContiguousCasesResult{
5879 /*Min=*/Cases.back(),
5880 /*Max=*/Cases.front(),
5881 /*Dest=*/Dest,
5882 /*OtherDest=*/OtherDest,
5883 /*Cases=*/&Cases,
5884 /*OtherCases=*/&OtherCases,
5885 };
5886 }
5887 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false);
5888 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5889 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5890 // contiguous range for the other destination. N.B. If CR is not a full range,
5891 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5892 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5893 assert(Cases.size() >= 2);
5894 auto *It =
5895 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5896 return L->getValue() != R->getValue() + 1;
5897 });
5898 if (It == Cases.end())
5899 return std::nullopt;
5900 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5901 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5902 Cases.size() - 2) {
5903 return ContiguousCasesResult{
5904 /*Min=*/cast<ConstantInt>(
5905 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5906 /*Max=*/
5908 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5909 /*Dest=*/OtherDest,
5910 /*OtherDest=*/Dest,
5911 /*Cases=*/&OtherCases,
5912 /*OtherCases=*/&Cases,
5913 };
5914 }
5915 }
5916 return std::nullopt;
5917}
5918
5920 DomTreeUpdater *DTU,
5921 bool RemoveOrigDefaultBlock = true) {
5922 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5923 auto *BB = Switch->getParent();
5924 auto *OrigDefaultBlock = Switch->getDefaultDest();
5925 if (RemoveOrigDefaultBlock)
5926 OrigDefaultBlock->removePredecessor(BB);
5927 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5928 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5929 OrigDefaultBlock);
5930 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5932 Switch->setDefaultDest(&*NewDefaultBlock);
5933 if (DTU) {
5935 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5936 if (RemoveOrigDefaultBlock &&
5937 !is_contained(successors(BB), OrigDefaultBlock))
5938 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5939 DTU->applyUpdates(Updates);
5940 }
5941}
5942
5943/// Turn a switch into an integer range comparison and branch.
5944/// Switches with more than 2 destinations are ignored.
5945/// Switches with 1 destination are also ignored.
5946bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5947 IRBuilder<> &Builder) {
5948 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5949
5950 bool HasDefault = !SI->defaultDestUnreachable();
5951
5952 auto *BB = SI->getParent();
5953 // Partition the cases into two sets with different destinations.
5954 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5955 BasicBlock *DestB = nullptr;
5958
5959 for (auto Case : SI->cases()) {
5960 BasicBlock *Dest = Case.getCaseSuccessor();
5961 if (!DestA)
5962 DestA = Dest;
5963 if (Dest == DestA) {
5964 CasesA.push_back(Case.getCaseValue());
5965 continue;
5966 }
5967 if (!DestB)
5968 DestB = Dest;
5969 if (Dest == DestB) {
5970 CasesB.push_back(Case.getCaseValue());
5971 continue;
5972 }
5973 return false; // More than two destinations.
5974 }
5975 if (!DestB)
5976 return false; // All destinations are the same and the default is unreachable
5977
5978 assert(DestA && DestB &&
5979 "Single-destination switch should have been folded.");
5980 assert(DestA != DestB);
5981 assert(DestB != SI->getDefaultDest());
5982 assert(!CasesB.empty() && "There must be non-default cases.");
5983 assert(!CasesA.empty() || HasDefault);
5984
5985 // Figure out if one of the sets of cases form a contiguous range.
5986 std::optional<ContiguousCasesResult> ContiguousCases;
5987
5988 // Only one icmp is needed when there is only one case.
5989 if (!HasDefault && CasesA.size() == 1)
5990 ContiguousCases = ContiguousCasesResult{
5991 /*Min=*/CasesA[0],
5992 /*Max=*/CasesA[0],
5993 /*Dest=*/DestA,
5994 /*OtherDest=*/DestB,
5995 /*Cases=*/&CasesA,
5996 /*OtherCases=*/&CasesB,
5997 };
5998 else if (CasesB.size() == 1)
5999 ContiguousCases = ContiguousCasesResult{
6000 /*Min=*/CasesB[0],
6001 /*Max=*/CasesB[0],
6002 /*Dest=*/DestB,
6003 /*OtherDest=*/DestA,
6004 /*Cases=*/&CasesB,
6005 /*OtherCases=*/&CasesA,
6006 };
6007 // Correctness: Cases to the default destination cannot be contiguous cases.
6008 else if (!HasDefault)
6009 ContiguousCases =
6010 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6011
6012 if (!ContiguousCases)
6013 ContiguousCases =
6014 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6015
6016 if (!ContiguousCases)
6017 return false;
6018
6019 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6020
6021 // Start building the compare and branch.
6022
6024 Constant *NumCases = ConstantInt::get(Offset->getType(),
6025 Max->getValue() - Min->getValue() + 1);
6026 BranchInst *NewBI;
6027 if (NumCases->isOneValue()) {
6028 assert(Max->getValue() == Min->getValue());
6029 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6030 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6031 }
6032 // If NumCases overflowed, then all possible values jump to the successor.
6033 else if (NumCases->isNullValue() && !Cases->empty()) {
6034 NewBI = Builder.CreateBr(Dest);
6035 } else {
6036 Value *Sub = SI->getCondition();
6037 if (!Offset->isNullValue())
6038 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6039 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6040 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6041 }
6042
6043 // Update weight for the newly-created conditional branch.
6044 if (hasBranchWeightMD(*SI) && NewBI->isConditional()) {
6045 SmallVector<uint64_t, 8> Weights;
6046 getBranchWeights(SI, Weights);
6047 if (Weights.size() == 1 + SI->getNumCases()) {
6048 uint64_t TrueWeight = 0;
6049 uint64_t FalseWeight = 0;
6050 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6051 if (SI->getSuccessor(I) == Dest)
6052 TrueWeight += Weights[I];
6053 else
6054 FalseWeight += Weights[I];
6055 }
6056 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6057 TrueWeight /= 2;
6058 FalseWeight /= 2;
6059 }
6060 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6061 /*IsExpected=*/false, /*ElideAllZero=*/true);
6062 }
6063 }
6064
6065 // Prune obsolete incoming values off the successors' PHI nodes.
6066 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6067 unsigned PreviousEdges = Cases->size();
6068 if (Dest == SI->getDefaultDest())
6069 ++PreviousEdges;
6070 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6071 PHI.removeIncomingValue(SI->getParent());
6072 }
6073 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6074 unsigned PreviousEdges = OtherCases->size();
6075 if (OtherDest == SI->getDefaultDest())
6076 ++PreviousEdges;
6077 unsigned E = PreviousEdges - 1;
6078 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6079 if (NewBI->isUnconditional())
6080 ++E;
6081 for (unsigned I = 0; I != E; ++I)
6082 PHI.removeIncomingValue(SI->getParent());
6083 }
6084
6085 // Clean up the default block - it may have phis or other instructions before
6086 // the unreachable terminator.
6087 if (!HasDefault)
6089
6090 auto *UnreachableDefault = SI->getDefaultDest();
6091
6092 // Drop the switch.
6093 SI->eraseFromParent();
6094
6095 if (!HasDefault && DTU)
6096 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6097
6098 return true;
6099}
6100
6101/// Compute masked bits for the condition of a switch
6102/// and use it to remove dead cases.
6104 AssumptionCache *AC,
6105 const DataLayout &DL) {
6106 Value *Cond = SI->getCondition();
6107 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6109 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6110
6111 // We can also eliminate cases by determining that their values are outside of
6112 // the limited range of the condition based on how many significant (non-sign)
6113 // bits are in the condition value.
6114 unsigned MaxSignificantBitsInCond =
6116
6117 // Gather dead cases.
6119 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6120 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6121 for (const auto &Case : SI->cases()) {
6122 auto *Successor = Case.getCaseSuccessor();
6123 if (DTU) {
6124 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6125 if (Inserted)
6126 UniqueSuccessors.push_back(Successor);
6127 ++It->second;
6128 }
6129 ConstantInt *CaseC = Case.getCaseValue();
6130 const APInt &CaseVal = CaseC->getValue();
6131 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6132 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6133 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6134 DeadCases.push_back(CaseC);
6135 if (DTU)
6136 --NumPerSuccessorCases[Successor];
6137 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6138 << " is dead.\n");
6139 } else if (IsKnownValuesValid)
6140 KnownValues.erase(CaseC);
6141 }
6142
6143 // If we can prove that the cases must cover all possible values, the
6144 // default destination becomes dead and we can remove it. If we know some
6145 // of the bits in the value, we can use that to more precisely compute the
6146 // number of possible unique case values.
6147 bool HasDefault = !SI->defaultDestUnreachable();
6148 const unsigned NumUnknownBits =
6149 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6150 assert(NumUnknownBits <= Known.getBitWidth());
6151 if (HasDefault && DeadCases.empty()) {
6152 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6154 return true;
6155 }
6156
6157 if (NumUnknownBits < 64 /* avoid overflow */) {
6158 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6159 if (SI->getNumCases() == AllNumCases) {
6161 return true;
6162 }
6163 // When only one case value is missing, replace default with that case.
6164 // Eliminating the default branch will provide more opportunities for
6165 // optimization, such as lookup tables.
6166 if (SI->getNumCases() == AllNumCases - 1) {
6167 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6168 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6169 if (CondTy->getIntegerBitWidth() > 64 ||
6170 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6171 return false;
6172
6173 uint64_t MissingCaseVal = 0;
6174 for (const auto &Case : SI->cases())
6175 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6176 auto *MissingCase = cast<ConstantInt>(
6177 ConstantInt::get(Cond->getType(), MissingCaseVal));
6179 SIW.addCase(MissingCase, SI->getDefaultDest(),
6180 SIW.getSuccessorWeight(0));
6182 /*RemoveOrigDefaultBlock*/ false);
6183 SIW.setSuccessorWeight(0, 0);
6184 return true;
6185 }
6186 }
6187 }
6188
6189 if (DeadCases.empty())
6190 return false;
6191
6193 for (ConstantInt *DeadCase : DeadCases) {
6194 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6195 assert(CaseI != SI->case_default() &&
6196 "Case was not found. Probably mistake in DeadCases forming.");
6197 // Prune unused values from PHI nodes.
6198 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6199 SIW.removeCase(CaseI);
6200 }
6201
6202 if (DTU) {
6203 std::vector<DominatorTree::UpdateType> Updates;
6204 for (auto *Successor : UniqueSuccessors)
6205 if (NumPerSuccessorCases[Successor] == 0)
6206 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6207 DTU->applyUpdates(Updates);
6208 }
6209
6210 return true;
6211}
6212
6213/// If BB would be eligible for simplification by
6214/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6215/// by an unconditional branch), look at the phi node for BB in the successor
6216/// block and see if the incoming value is equal to CaseValue. If so, return
6217/// the phi node, and set PhiIndex to BB's index in the phi node.
6219 BasicBlock *BB, int *PhiIndex) {
6220 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6221 return nullptr; // BB must be empty to be a candidate for simplification.
6222 if (!BB->getSinglePredecessor())
6223 return nullptr; // BB must be dominated by the switch.
6224
6226 if (!Branch || !Branch->isUnconditional())
6227 return nullptr; // Terminator must be unconditional branch.
6228
6229 BasicBlock *Succ = Branch->getSuccessor(0);
6230
6231 for (PHINode &PHI : Succ->phis()) {
6232 int Idx = PHI.getBasicBlockIndex(BB);
6233 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6234
6235 Value *InValue = PHI.getIncomingValue(Idx);
6236 if (InValue != CaseValue)
6237 continue;
6238
6239 *PhiIndex = Idx;
6240 return &PHI;
6241 }
6242
6243 return nullptr;
6244}
6245
6246/// Try to forward the condition of a switch instruction to a phi node
6247/// dominated by the switch, if that would mean that some of the destination
6248/// blocks of the switch can be folded away. Return true if a change is made.
6250 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6251
6252 ForwardingNodesMap ForwardingNodes;
6253 BasicBlock *SwitchBlock = SI->getParent();
6254 bool Changed = false;
6255 for (const auto &Case : SI->cases()) {
6256 ConstantInt *CaseValue = Case.getCaseValue();
6257 BasicBlock *CaseDest = Case.getCaseSuccessor();
6258
6259 // Replace phi operands in successor blocks that are using the constant case
6260 // value rather than the switch condition variable:
6261 // switchbb:
6262 // switch i32 %x, label %default [
6263 // i32 17, label %succ
6264 // ...
6265 // succ:
6266 // %r = phi i32 ... [ 17, %switchbb ] ...
6267 // -->
6268 // %r = phi i32 ... [ %x, %switchbb ] ...
6269
6270 for (PHINode &Phi : CaseDest->phis()) {
6271 // This only works if there is exactly 1 incoming edge from the switch to
6272 // a phi. If there is >1, that means multiple cases of the switch map to 1
6273 // value in the phi, and that phi value is not the switch condition. Thus,
6274 // this transform would not make sense (the phi would be invalid because
6275 // a phi can't have different incoming values from the same block).
6276 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6277 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6278 count(Phi.blocks(), SwitchBlock) == 1) {
6279 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6280 Changed = true;
6281 }
6282 }
6283
6284 // Collect phi nodes that are indirectly using this switch's case constants.
6285 int PhiIdx;
6286 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6287 ForwardingNodes[Phi].push_back(PhiIdx);
6288 }
6289
6290 for (auto &ForwardingNode : ForwardingNodes) {
6291 PHINode *Phi = ForwardingNode.first;
6292 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6293 // Check if it helps to fold PHI.
6294 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6295 continue;
6296
6297 for (int Index : Indexes)
6298 Phi->setIncomingValue(Index, SI->getCondition());
6299 Changed = true;
6300 }
6301
6302 return Changed;
6303}
6304
6305/// Return true if the backend will be able to handle
6306/// initializing an array of constants like C.
6308 if (C->isThreadDependent())
6309 return false;
6310 if (C->isDLLImportDependent())
6311 return false;
6312
6313 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
6316 return false;
6317
6319 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6320 // materializing the array of constants.
6321 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6322 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6323 return false;
6324 }
6325
6326 if (!TTI.shouldBuildLookupTablesForConstant(C))
6327 return false;
6328
6329 return true;
6330}
6331
6332/// If V is a Constant, return it. Otherwise, try to look up
6333/// its constant value in ConstantPool, returning 0 if it's not there.
6334static Constant *
6337 if (Constant *C = dyn_cast<Constant>(V))
6338 return C;
6339 return ConstantPool.lookup(V);
6340}
6341
6342/// Try to fold instruction I into a constant. This works for
6343/// simple instructions such as binary operations where both operands are
6344/// constant or can be replaced by constants from the ConstantPool. Returns the
6345/// resulting constant on success, 0 otherwise.
6346static Constant *
6350 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6351 if (!A)
6352 return nullptr;
6353 if (A->isAllOnesValue())
6354 return lookupConstant(Select->getTrueValue(), ConstantPool);
6355 if (A->isNullValue())
6356 return lookupConstant(Select->getFalseValue(), ConstantPool);
6357 return nullptr;
6358 }
6359
6361 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6362 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6363 COps.push_back(A);
6364 else
6365 return nullptr;
6366 }
6367
6368 return ConstantFoldInstOperands(I, COps, DL);
6369}
6370
6371/// Try to determine the resulting constant values in phi nodes
6372/// at the common destination basic block, *CommonDest, for one of the case
6373/// destionations CaseDest corresponding to value CaseVal (0 for the default
6374/// case), of a switch instruction SI.
6375static bool
6377 BasicBlock **CommonDest,
6378 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6379 const DataLayout &DL, const TargetTransformInfo &TTI) {
6380 // The block from which we enter the common destination.
6381 BasicBlock *Pred = SI->getParent();
6382
6383 // If CaseDest is empty except for some side-effect free instructions through
6384 // which we can constant-propagate the CaseVal, continue to its successor.
6386 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6387 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
6388 if (I.isTerminator()) {
6389 // If the terminator is a simple branch, continue to the next block.
6390 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6391 return false;
6392 Pred = CaseDest;
6393 CaseDest = I.getSuccessor(0);
6394 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6395 // Instruction is side-effect free and constant.
6396
6397 // If the instruction has uses outside this block or a phi node slot for
6398 // the block, it is not safe to bypass the instruction since it would then
6399 // no longer dominate all its uses.
6400 for (auto &Use : I.uses()) {
6401 User *User = Use.getUser();
6403 if (I->getParent() == CaseDest)
6404 continue;
6405 if (PHINode *Phi = dyn_cast<PHINode>(User))
6406 if (Phi->getIncomingBlock(Use) == CaseDest)
6407 continue;
6408 return false;
6409 }
6410
6411 ConstantPool.insert(std::make_pair(&I, C));
6412 } else {
6413 break;
6414 }
6415 }
6416
6417 // If we did not have a CommonDest before, use the current one.
6418 if (!*CommonDest)
6419 *CommonDest = CaseDest;
6420 // If the destination isn't the common one, abort.
6421 if (CaseDest != *CommonDest)
6422 return false;
6423
6424 // Get the values for this case from phi nodes in the destination block.
6425 for (PHINode &PHI : (*CommonDest)->phis()) {
6426 int Idx = PHI.getBasicBlockIndex(Pred);
6427 if (Idx == -1)
6428 continue;
6429
6430 Constant *ConstVal =
6431 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6432 if (!ConstVal)
6433 return false;
6434
6435 // Be conservative about which kinds of constants we support.
6436 if (!validLookupTableConstant(ConstVal, TTI))
6437 return false;
6438
6439 Res.push_back(std::make_pair(&PHI, ConstVal));
6440 }
6441
6442 return Res.size() > 0;
6443}
6444
6445// Helper function used to add CaseVal to the list of cases that generate
6446// Result. Returns the updated number of cases that generate this result.
6447static size_t mapCaseToResult(ConstantInt *CaseVal,
6448 SwitchCaseResultVectorTy &UniqueResults,
6449 Constant *Result) {
6450 for (auto &I : UniqueResults) {
6451 if (I.first == Result) {
6452 I.second.push_back(CaseVal);
6453 return I.second.size();
6454 }
6455 }
6456 UniqueResults.push_back(
6457 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6458 return 1;
6459}
6460
6461// Helper function that initializes a map containing
6462// results for the PHI node of the common destination block for a switch
6463// instruction. Returns false if multiple PHI nodes have been found or if
6464// there is not a common destination block for the switch.
6466 BasicBlock *&CommonDest,
6467 SwitchCaseResultVectorTy &UniqueResults,
6468 Constant *&DefaultResult,
6469 const DataLayout &DL,
6470 const TargetTransformInfo &TTI,
6471 uintptr_t MaxUniqueResults) {
6472 for (const auto &I : SI->cases()) {
6473 ConstantInt *CaseVal = I.getCaseValue();
6474
6475 // Resulting value at phi nodes for this case value.
6476 SwitchCaseResultsTy Results;
6477 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6478 DL, TTI))
6479 return false;
6480
6481 // Only one value per case is permitted.
6482 if (Results.size() > 1)
6483 return false;
6484
6485 // Add the case->result mapping to UniqueResults.
6486 const size_t NumCasesForResult =
6487 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6488
6489 // Early out if there are too many cases for this result.
6490 if (NumCasesForResult > MaxSwitchCasesPerResult)
6491 return false;
6492
6493 // Early out if there are too many unique results.
6494 if (UniqueResults.size() > MaxUniqueResults)
6495 return false;
6496
6497 // Check the PHI consistency.
6498 if (!PHI)
6499 PHI = Results[0].first;
6500 else if (PHI != Results[0].first)
6501 return false;
6502 }
6503 // Find the default result value.
6505 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6506 DL, TTI);
6507 // If the default value is not found abort unless the default destination
6508 // is unreachable.
6509 DefaultResult =
6510 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6511
6512 return DefaultResult || SI->defaultDestUnreachable();
6513}
6514
6515// Helper function that checks if it is possible to transform a switch with only
6516// two cases (or two cases + default) that produces a result into a select.
6517// TODO: Handle switches with more than 2 cases that map to the same result.
6518// The branch weights correspond to the provided Condition (i.e. if Condition is
6519// modified from the original SwitchInst, the caller must adjust the weights)
6520static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6521 Constant *DefaultResult, Value *Condition,
6522 IRBuilder<> &Builder, const DataLayout &DL,
6523 ArrayRef<uint32_t> BranchWeights) {
6524 // If we are selecting between only two cases transform into a simple
6525 // select or a two-way select if default is possible.
6526 // Example:
6527 // switch (a) { %0 = icmp eq i32 %a, 10
6528 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6529 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6530 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6531 // }
6532
6533 const bool HasBranchWeights =
6534 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6535
6536 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6537 ResultVector[1].second.size() == 1) {
6538 ConstantInt *FirstCase = ResultVector[0].second[0];
6539 ConstantInt *SecondCase = ResultVector[1].second[0];
6540 Value *SelectValue = ResultVector[1].first;
6541 if (DefaultResult) {
6542 Value *ValueCompare =
6543 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6544 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6545 DefaultResult, "switch.select");
6546 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6547 SI && HasBranchWeights) {
6548 // We start with 3 probabilities, where the numerator is the
6549 // corresponding BranchWeights[i], and the denominator is the sum over
6550 // BranchWeights. We want the probability and negative probability of
6551 // Condition == SecondCase.
6552 assert(BranchWeights.size() == 3);
6554 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6555 /*IsExpected=*/false, /*ElideAllZero=*/true);
6556 }
6557 }
6558 Value *ValueCompare =
6559 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6560 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6561 SelectValue, "switch.select");
6562 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6563 // We may have had a DefaultResult. Base the position of the first and
6564 // second's branch weights accordingly. Also the proability that Condition
6565 // != FirstCase needs to take that into account.
6566 assert(BranchWeights.size() >= 2);
6567 size_t FirstCasePos = (Condition != nullptr);
6568 size_t SecondCasePos = FirstCasePos + 1;
6569 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6571 {BranchWeights[FirstCasePos],
6572 DefaultCase + BranchWeights[SecondCasePos]},
6573 /*IsExpected=*/false, /*ElideAllZero=*/true);
6574 }
6575 return Ret;
6576 }
6577
6578 // Handle the degenerate case where two cases have the same result value.
6579 if (ResultVector.size() == 1 && DefaultResult) {
6580 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6581 unsigned CaseCount = CaseValues.size();
6582 // n bits group cases map to the same result:
6583 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6584 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6585 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6586 if (isPowerOf2_32(CaseCount)) {
6587 ConstantInt *MinCaseVal = CaseValues[0];
6588 // If there are bits that are set exclusively by CaseValues, we
6589 // can transform the switch into a select if the conjunction of
6590 // all the values uniquely identify CaseValues.
6591 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6592
6593 // Find the minimum value and compute the and of all the case values.
6594 for (auto *Case : CaseValues) {
6595 if (Case->getValue().slt(MinCaseVal->getValue()))
6596 MinCaseVal = Case;
6597 AndMask &= Case->getValue();
6598 }
6599 KnownBits Known = computeKnownBits(Condition, DL);
6600
6601 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6602 // Compute the number of bits that are free to vary.
6603 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6604
6605 // Check if the number of values covered by the mask is equal
6606 // to the number of cases.
6607 if (FreeBits == Log2_32(CaseCount)) {
6608 Value *And = Builder.CreateAnd(Condition, AndMask);
6609 Value *Cmp = Builder.CreateICmpEQ(
6610 And, Constant::getIntegerValue(And->getType(), AndMask));
6611 Value *Ret =
6612 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6613 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6614 // We know there's a Default case. We base the resulting branch
6615 // weights off its probability.
6616 assert(BranchWeights.size() >= 2);
6618 *SI,
6619 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6620 /*IsExpected=*/false, /*ElideAllZero=*/true);
6621 }
6622 return Ret;
6623 }
6624 }
6625
6626 // Mark the bits case number touched.
6627 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6628 for (auto *Case : CaseValues)
6629 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6630
6631 // Check if cases with the same result can cover all number
6632 // in touched bits.
6633 if (BitMask.popcount() == Log2_32(CaseCount)) {
6634 if (!MinCaseVal->isNullValue())
6635 Condition = Builder.CreateSub(Condition, MinCaseVal);
6636 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6637 Value *Cmp = Builder.CreateICmpEQ(
6638 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6639 Value *Ret =
6640 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6641 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6642 assert(BranchWeights.size() >= 2);
6644 *SI,
6645 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6646 /*IsExpected=*/false, /*ElideAllZero=*/true);
6647 }
6648 return Ret;
6649 }
6650 }
6651
6652 // Handle the degenerate case where two cases have the same value.
6653 if (CaseValues.size() == 2) {
6654 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6655 "switch.selectcmp.case1");
6656 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6657 "switch.selectcmp.case2");
6658 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6659 Value *Ret =
6660 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6661 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6662 assert(BranchWeights.size() >= 2);
6664 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6665 /*IsExpected=*/false, /*ElideAllZero=*/true);
6666 }
6667 return Ret;
6668 }
6669 }
6670
6671 return nullptr;
6672}
6673
6674// Helper function to cleanup a switch instruction that has been converted into
6675// a select, fixing up PHI nodes and basic blocks.
6677 Value *SelectValue,
6678 IRBuilder<> &Builder,
6679 DomTreeUpdater *DTU) {
6680 std::vector<DominatorTree::UpdateType> Updates;
6681
6682 BasicBlock *SelectBB = SI->getParent();
6683 BasicBlock *DestBB = PHI->getParent();
6684
6685 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6686 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6687 Builder.CreateBr(DestBB);
6688
6689 // Remove the switch.
6690
6691 PHI->removeIncomingValueIf(
6692 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6693 PHI->addIncoming(SelectValue, SelectBB);
6694
6695 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6696 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6697 BasicBlock *Succ = SI->getSuccessor(i);
6698
6699 if (Succ == DestBB)
6700 continue;
6701 Succ->removePredecessor(SelectBB);
6702 if (DTU && RemovedSuccessors.insert(Succ).second)
6703 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6704 }
6705 SI->eraseFromParent();
6706 if (DTU)
6707 DTU->applyUpdates(Updates);
6708}
6709
6710/// If a switch is only used to initialize one or more phi nodes in a common
6711/// successor block with only two different constant values, try to replace the
6712/// switch with a select. Returns true if the fold was made.
6714 DomTreeUpdater *DTU, const DataLayout &DL,
6715 const TargetTransformInfo &TTI) {
6716 Value *const Cond = SI->getCondition();
6717 PHINode *PHI = nullptr;
6718 BasicBlock *CommonDest = nullptr;
6719 Constant *DefaultResult;
6720 SwitchCaseResultVectorTy UniqueResults;
6721 // Collect all the cases that will deliver the same value from the switch.
6722 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6723 DL, TTI, /*MaxUniqueResults*/ 2))
6724 return false;
6725
6726 assert(PHI != nullptr && "PHI for value select not found");
6727 Builder.SetInsertPoint(SI);
6728 SmallVector<uint32_t, 4> BranchWeights;
6730 [[maybe_unused]] auto HasWeights =
6732 assert(!HasWeights == (BranchWeights.empty()));
6733 }
6734 assert(BranchWeights.empty() ||
6735 (BranchWeights.size() >=
6736 UniqueResults.size() + (DefaultResult != nullptr)));
6737
6738 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6739 Builder, DL, BranchWeights);
6740 if (!SelectValue)
6741 return false;
6742
6743 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6744 return true;
6745}
6746
6747namespace {
6748
6749/// This class finds alternatives for switches to ultimately
6750/// replace the switch.
6751class SwitchReplacement {
6752public:
6753 /// Create a helper for optimizations to use as a switch replacement.
6754 /// Find a better representation for the content of Values,
6755 /// using DefaultValue to fill any holes in the table.
6756 SwitchReplacement(
6757 Module &M, uint64_t TableSize, ConstantInt *Offset,
6758 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6759 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6760
6761 /// Build instructions with Builder to retrieve values using Index
6762 /// and replace the switch.
6763 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6764 Function *Func);
6765
6766 /// Return true if a table with TableSize elements of
6767 /// type ElementType would fit in a target-legal register.
6768 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6769 Type *ElementType);
6770
6771 /// Return the default value of the switch.
6772 Constant *getDefaultValue();
6773
6774 /// Return true if the replacement is a lookup table.
6775 bool isLookupTable();
6776
6777 /// Return true if the replacement is a bit map.
6778 bool isBitMap();
6779
6780private:
6781 // Depending on the switch, there are different alternatives.
6782 enum {
6783 // For switches where each case contains the same value, we just have to
6784 // store that single value and return it for each lookup.
6785 SingleValueKind,
6786
6787 // For switches where there is a linear relationship between table index
6788 // and values. We calculate the result with a simple multiplication
6789 // and addition instead of a table lookup.
6790 LinearMapKind,
6791
6792 // For small tables with integer elements, we can pack them into a bitmap
6793 // that fits into a target-legal register. Values are retrieved by
6794 // shift and mask operations.
6795 BitMapKind,
6796
6797 // The table is stored as an array of values. Values are retrieved by load
6798 // instructions from the table.
6799 LookupTableKind
6800 } Kind;
6801
6802 // The default value of the switch.
6803 Constant *DefaultValue;
6804
6805 // The type of the output values.
6806 Type *ValueType;
6807
6808 // For SingleValueKind, this is the single value.
6809 Constant *SingleValue = nullptr;
6810
6811 // For BitMapKind, this is the bitmap.
6812 ConstantInt *BitMap = nullptr;
6813 IntegerType *BitMapElementTy = nullptr;
6814
6815 // For LinearMapKind, these are the constants used to derive the value.
6816 ConstantInt *LinearOffset = nullptr;
6817 ConstantInt *LinearMultiplier = nullptr;
6818 bool LinearMapValWrapped = false;
6819
6820 // For LookupTableKind, this is the table.
6821 Constant *Initializer = nullptr;
6822};
6823
6824} // end anonymous namespace
6825
6826SwitchReplacement::SwitchReplacement(
6827 Module &M, uint64_t TableSize, ConstantInt *Offset,
6828 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6829 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
6830 : DefaultValue(DefaultValue) {
6831 assert(Values.size() && "Can't build lookup table without values!");
6832 assert(TableSize >= Values.size() && "Can't fit values in table!");
6833
6834 // If all values in the table are equal, this is that value.
6835 SingleValue = Values.begin()->second;
6836
6837 ValueType = Values.begin()->second->getType();
6838
6839 // Build up the table contents.
6840 SmallVector<Constant *, 64> TableContents(TableSize);
6841 for (const auto &[CaseVal, CaseRes] : Values) {
6842 assert(CaseRes->getType() == ValueType);
6843
6844 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6845 TableContents[Idx] = CaseRes;
6846
6847 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6848 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6849 }
6850
6851 // Fill in any holes in the table with the default result.
6852 if (Values.size() < TableSize) {
6853 assert(DefaultValue &&
6854 "Need a default value to fill the lookup table holes.");
6855 assert(DefaultValue->getType() == ValueType);
6856 for (uint64_t I = 0; I < TableSize; ++I) {
6857 if (!TableContents[I])
6858 TableContents[I] = DefaultValue;
6859 }
6860
6861 // If the default value is poison, all the holes are poison.
6862 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6863
6864 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6865 SingleValue = nullptr;
6866 }
6867
6868 // If each element in the table contains the same value, we only need to store
6869 // that single value.
6870 if (SingleValue) {
6871 Kind = SingleValueKind;
6872 return;
6873 }
6874
6875 // Check if we can derive the value with a linear transformation from the
6876 // table index.
6878 bool LinearMappingPossible = true;
6879 APInt PrevVal;
6880 APInt DistToPrev;
6881 // When linear map is monotonic and signed overflow doesn't happen on
6882 // maximum index, we can attach nsw on Add and Mul.
6883 bool NonMonotonic = false;
6884 assert(TableSize >= 2 && "Should be a SingleValue table.");
6885 // Check if there is the same distance between two consecutive values.
6886 for (uint64_t I = 0; I < TableSize; ++I) {
6887 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6888
6889 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6890 // This is an poison, so it's (probably) a lookup table hole.
6891 // To prevent any regressions from before we switched to using poison as
6892 // the default value, holes will fall back to using the first value.
6893 // This can be removed once we add proper handling for poisons in lookup
6894 // tables.
6895 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6896 }
6897
6898 if (!ConstVal) {
6899 // This is an undef. We could deal with it, but undefs in lookup tables
6900 // are very seldom. It's probably not worth the additional complexity.
6901 LinearMappingPossible = false;
6902 break;
6903 }
6904 const APInt &Val = ConstVal->getValue();
6905 if (I != 0) {
6906 APInt Dist = Val - PrevVal;
6907 if (I == 1) {
6908 DistToPrev = Dist;
6909 } else if (Dist != DistToPrev) {
6910 LinearMappingPossible = false;
6911 break;
6912 }
6913 NonMonotonic |=
6914 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6915 }
6916 PrevVal = Val;
6917 }
6918 if (LinearMappingPossible) {
6919 LinearOffset = cast<ConstantInt>(TableContents[0]);
6920 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6921 APInt M = LinearMultiplier->getValue();
6922 bool MayWrap = true;
6923 if (isIntN(M.getBitWidth(), TableSize - 1))
6924 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6925 LinearMapValWrapped = NonMonotonic || MayWrap;
6926 Kind = LinearMapKind;
6927 return;
6928 }
6929 }
6930
6931 // If the type is integer and the table fits in a register, build a bitmap.
6932 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6934 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6935 for (uint64_t I = TableSize; I > 0; --I) {
6936 TableInt <<= IT->getBitWidth();
6937 // Insert values into the bitmap. Undef values are set to zero.
6938 if (!isa<UndefValue>(TableContents[I - 1])) {
6939 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6940 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6941 }
6942 }
6943 BitMap = ConstantInt::get(M.getContext(), TableInt);
6944 BitMapElementTy = IT;
6945 Kind = BitMapKind;
6946 return;
6947 }
6948
6949 // Store the table in an array.
6950 auto *TableTy = ArrayType::get(ValueType, TableSize);
6951 Initializer = ConstantArray::get(TableTy, TableContents);
6952
6953 Kind = LookupTableKind;
6954}
6955
6956Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
6957 const DataLayout &DL, Function *Func) {
6958 switch (Kind) {
6959 case SingleValueKind:
6960 return SingleValue;
6961 case LinearMapKind: {
6962 ++NumLinearMaps;
6963 // Derive the result value from the input value.
6964 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6965 false, "switch.idx.cast");
6966 if (!LinearMultiplier->isOne())
6967 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6968 /*HasNUW = */ false,
6969 /*HasNSW = */ !LinearMapValWrapped);
6970
6971 if (!LinearOffset->isZero())
6972 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6973 /*HasNUW = */ false,
6974 /*HasNSW = */ !LinearMapValWrapped);
6975 return Result;
6976 }
6977 case BitMapKind: {
6978 ++NumBitMaps;
6979 // Type of the bitmap (e.g. i59).
6980 IntegerType *MapTy = BitMap->getIntegerType();
6981
6982 // Cast Index to the same type as the bitmap.
6983 // Note: The Index is <= the number of elements in the table, so
6984 // truncating it to the width of the bitmask is safe.
6985 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6986
6987 // Multiply the shift amount by the element width. NUW/NSW can always be
6988 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
6989 // BitMap's bit width.
6990 ShiftAmt = Builder.CreateMul(
6991 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6992 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6993
6994 // Shift down.
6995 Value *DownShifted =
6996 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6997 // Mask off.
6998 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6999 }
7000 case LookupTableKind: {
7001 ++NumLookupTables;
7002 auto *Table =
7003 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7004 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7005 Initializer, "switch.table." + Func->getName());
7006 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7007 // Set the alignment to that of an array items. We will be only loading one
7008 // value out of it.
7009 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7010 Type *IndexTy = DL.getIndexType(Table->getType());
7011 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7012
7013 if (Index->getType() != IndexTy) {
7014 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7015 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7016 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7017 Zext->setNonNeg(
7018 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7019 }
7020
7021 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7022 Value *GEP =
7023 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7024 return Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7025 }
7026 }
7027 llvm_unreachable("Unknown helper kind!");
7028}
7029
7030bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7031 uint64_t TableSize,
7032 Type *ElementType) {
7033 auto *IT = dyn_cast<IntegerType>(ElementType);
7034 if (!IT)
7035 return false;
7036 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7037 // are <= 15, we could try to narrow the type.
7038
7039 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7040 if (TableSize >= UINT_MAX / IT->getBitWidth())
7041 return false;
7042 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7043}
7044
7046 const DataLayout &DL) {
7047 // Allow any legal type.
7048 if (TTI.isTypeLegal(Ty))
7049 return true;
7050
7051 auto *IT = dyn_cast<IntegerType>(Ty);
7052 if (!IT)
7053 return false;
7054
7055 // Also allow power of 2 integer types that have at least 8 bits and fit in
7056 // a register. These types are common in frontend languages and targets
7057 // usually support loads of these types.
7058 // TODO: We could relax this to any integer that fits in a register and rely
7059 // on ABI alignment and padding in the table to allow the load to be widened.
7060 // Or we could widen the constants and truncate the load.
7061 unsigned BitWidth = IT->getBitWidth();
7062 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7063 DL.fitsInLegalInteger(IT->getBitWidth());
7064}
7065
7066Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7067
7068bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7069
7070bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7071
7072static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
7073 // 40% is the default density for building a jump table in optsize/minsize
7074 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
7075 // function was based on.
7076 const uint64_t MinDensity = 40;
7077
7078 if (CaseRange >= UINT64_MAX / 100)
7079 return false; // Avoid multiplication overflows below.
7080
7081 return NumCases * 100 >= CaseRange * MinDensity;
7082}
7083
7085 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7086 uint64_t Range = Diff + 1;
7087 if (Range < Diff)
7088 return false; // Overflow.
7089
7090 return isSwitchDense(Values.size(), Range);
7091}
7092
7093/// Determine whether a lookup table should be built for this switch, based on
7094/// the number of cases, size of the table, and the types of the results.
7095// TODO: We could support larger than legal types by limiting based on the
7096// number of loads required and/or table size. If the constants are small we
7097// could use smaller table entries and extend after the load.
7099 const TargetTransformInfo &TTI,
7100 const DataLayout &DL,
7101 const SmallVector<Type *> &ResultTypes) {
7102 if (SI->getNumCases() > TableSize)
7103 return false; // TableSize overflowed.
7104
7105 bool AllTablesFitInRegister = true;
7106 bool HasIllegalType = false;
7107 for (const auto &Ty : ResultTypes) {
7108 // Saturate this flag to true.
7109 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7110
7111 // Saturate this flag to false.
7112 AllTablesFitInRegister =
7113 AllTablesFitInRegister &&
7114 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7115
7116 // If both flags saturate, we're done. NOTE: This *only* works with
7117 // saturating flags, and all flags have to saturate first due to the
7118 // non-deterministic behavior of iterating over a dense map.
7119 if (HasIllegalType && !AllTablesFitInRegister)
7120 break;
7121 }
7122
7123 // If each table would fit in a register, we should build it anyway.
7124 if (AllTablesFitInRegister)
7125 return true;
7126
7127 // Don't build a table that doesn't fit in-register if it has illegal types.
7128 if (HasIllegalType)
7129 return false;
7130
7131 return isSwitchDense(SI->getNumCases(), TableSize);
7132}
7133
7135 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7136 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7137 const DataLayout &DL, const TargetTransformInfo &TTI) {
7138 if (MinCaseVal.isNullValue())
7139 return true;
7140 if (MinCaseVal.isNegative() ||
7141 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7142 !HasDefaultResults)
7143 return false;
7144 return all_of(ResultTypes, [&](const auto &ResultType) {
7145 return SwitchReplacement::wouldFitInRegister(
7146 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7147 });
7148}
7149
7150/// Try to reuse the switch table index compare. Following pattern:
7151/// \code
7152/// if (idx < tablesize)
7153/// r = table[idx]; // table does not contain default_value
7154/// else
7155/// r = default_value;
7156/// if (r != default_value)
7157/// ...
7158/// \endcode
7159/// Is optimized to:
7160/// \code
7161/// cond = idx < tablesize;
7162/// if (cond)
7163/// r = table[idx];
7164/// else
7165/// r = default_value;
7166/// if (cond)
7167/// ...
7168/// \endcode
7169/// Jump threading will then eliminate the second if(cond).
7171 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
7172 Constant *DefaultValue,
7173 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7175 if (!CmpInst)
7176 return;
7177
7178 // We require that the compare is in the same block as the phi so that jump
7179 // threading can do its work afterwards.
7180 if (CmpInst->getParent() != PhiBlock)
7181 return;
7182
7184 if (!CmpOp1)
7185 return;
7186
7187 Value *RangeCmp = RangeCheckBranch->getCondition();
7188 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7189 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7190
7191 // Check if the compare with the default value is constant true or false.
7192 const DataLayout &DL = PhiBlock->getDataLayout();
7194 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7195 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7196 return;
7197
7198 // Check if the compare with the case values is distinct from the default
7199 // compare result.
7200 for (auto ValuePair : Values) {
7202 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7203 if (!CaseConst || CaseConst == DefaultConst ||
7204 (CaseConst != TrueConst && CaseConst != FalseConst))
7205 return;
7206 }
7207
7208 // Check if the branch instruction dominates the phi node. It's a simple
7209 // dominance check, but sufficient for our needs.
7210 // Although this check is invariant in the calling loops, it's better to do it
7211 // at this late stage. Practically we do it at most once for a switch.
7212 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7213 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7214 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7215 return;
7216 }
7217
7218 if (DefaultConst == FalseConst) {
7219 // The compare yields the same result. We can replace it.
7220 CmpInst->replaceAllUsesWith(RangeCmp);
7221 ++NumTableCmpReuses;
7222 } else {
7223 // The compare yields the same result, just inverted. We can replace it.
7224 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7225 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7226 RangeCheckBranch->getIterator());
7227 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7228 ++NumTableCmpReuses;
7229 }
7230}
7231
7232/// If the switch is only used to initialize one or more phi nodes in a common
7233/// successor block with different constant values, replace the switch with
7234/// lookup tables.
7236 DomTreeUpdater *DTU, const DataLayout &DL,
7237 const TargetTransformInfo &TTI,
7238 bool ConvertSwitchToLookupTable) {
7239 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7240
7241 BasicBlock *BB = SI->getParent();
7242 Function *Fn = BB->getParent();
7243
7244 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7245 // split off a dense part and build a lookup table for that.
7246
7247 // FIXME: This creates arrays of GEPs to constant strings, which means each
7248 // GEP needs a runtime relocation in PIC code. We should just build one big
7249 // string and lookup indices into that.
7250
7251 // Ignore switches with less than three cases. Lookup tables will not make
7252 // them faster, so we don't analyze them.
7253 if (SI->getNumCases() < 3)
7254 return false;
7255
7256 // Figure out the corresponding result for each case value and phi node in the
7257 // common destination, as well as the min and max case values.
7258 assert(!SI->cases().empty());
7259 SwitchInst::CaseIt CI = SI->case_begin();
7260 ConstantInt *MinCaseVal = CI->getCaseValue();
7261 ConstantInt *MaxCaseVal = CI->getCaseValue();
7262
7263 BasicBlock *CommonDest = nullptr;
7264
7265 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7267
7269 SmallVector<Type *> ResultTypes;
7271
7272 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7273 ConstantInt *CaseVal = CI->getCaseValue();
7274 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7275 MinCaseVal = CaseVal;
7276 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7277 MaxCaseVal = CaseVal;
7278
7279 // Resulting value at phi nodes for this case value.
7281 ResultsTy Results;
7282 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7283 Results, DL, TTI))
7284 return false;
7285
7286 // Append the result and result types from this case to the list for each
7287 // phi.
7288 for (const auto &I : Results) {
7289 PHINode *PHI = I.first;
7290 Constant *Value = I.second;
7291 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7292 if (Inserted)
7293 PHIs.push_back(PHI);
7294 It->second.push_back(std::make_pair(CaseVal, Value));
7295 ResultTypes.push_back(PHI->getType());
7296 }
7297 }
7298
7299 // If the table has holes, we need a constant result for the default case
7300 // or a bitmask that fits in a register.
7301 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7302 bool HasDefaultResults =
7303 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7304 DefaultResultsList, DL, TTI);
7305 for (const auto &I : DefaultResultsList) {
7306 PHINode *PHI = I.first;
7307 Constant *Result = I.second;
7308 DefaultResults[PHI] = Result;
7309 }
7310
7311 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7312 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7313 uint64_t TableSize;
7314 ConstantInt *TableIndexOffset;
7315 if (UseSwitchConditionAsTableIndex) {
7316 TableSize = MaxCaseVal->getLimitedValue() + 1;
7317 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7318 } else {
7319 TableSize =
7320 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7321
7322 TableIndexOffset = MinCaseVal;
7323 }
7324
7325 // If the default destination is unreachable, or if the lookup table covers
7326 // all values of the conditional variable, branch directly to the lookup table
7327 // BB. Otherwise, check that the condition is within the case range.
7328 uint64_t NumResults = ResultLists[PHIs[0]].size();
7329 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7330
7331 bool TableHasHoles = (NumResults < TableSize);
7332
7333 // If the table has holes but the default destination doesn't produce any
7334 // constant results, the lookup table entries corresponding to the holes will
7335 // contain poison.
7336 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7337
7338 // If the default destination doesn't produce a constant result but is still
7339 // reachable, and the lookup table has holes, we need to use a mask to
7340 // determine if the current index should load from the lookup table or jump
7341 // to the default case.
7342 // The mask is unnecessary if the table has holes but the default destination
7343 // is unreachable, as in that case the holes must also be unreachable.
7344 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7345 if (NeedMask) {
7346 // As an extra penalty for the validity test we require more cases.
7347 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7348 return false;
7349 if (!DL.fitsInLegalInteger(TableSize))
7350 return false;
7351 }
7352
7353 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7354 return false;
7355
7356 // Compute the table index value.
7357 Value *TableIndex;
7358 if (UseSwitchConditionAsTableIndex) {
7359 TableIndex = SI->getCondition();
7360 if (HasDefaultResults) {
7361 // Grow the table to cover all possible index values to avoid the range
7362 // check. It will use the default result to fill in the table hole later,
7363 // so make sure it exist.
7364 ConstantRange CR =
7365 computeConstantRange(TableIndex, /* ForSigned */ false);
7366 // Grow the table shouldn't have any size impact by checking
7367 // wouldFitInRegister.
7368 // TODO: Consider growing the table also when it doesn't fit in a register
7369 // if no optsize is specified.
7370 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7371 if (!CR.isUpperWrapped() &&
7372 all_of(ResultTypes, [&](const auto &ResultType) {
7373 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7374 ResultType);
7375 })) {
7376 // There may be some case index larger than the UpperBound (unreachable
7377 // case), so make sure the table size does not get smaller.
7378 TableSize = std::max(UpperBound, TableSize);
7379 // The default branch is unreachable after we enlarge the lookup table.
7380 // Adjust DefaultIsReachable to reuse code path.
7381 DefaultIsReachable = false;
7382 }
7383 }
7384 }
7385
7386 // Keep track of the switch replacement for each phi
7388 for (PHINode *PHI : PHIs) {
7389 const auto &ResultList = ResultLists[PHI];
7390
7391 Type *ResultType = ResultList.begin()->second->getType();
7392 // Use any value to fill the lookup table holes.
7394 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7395 StringRef FuncName = Fn->getName();
7396 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7397 ResultList, DefaultVal, DL, FuncName);
7398 PhiToReplacementMap.insert({PHI, Replacement});
7399 }
7400
7401 bool AnyLookupTables = any_of(
7402 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7403 bool AnyBitMaps = any_of(PhiToReplacementMap,
7404 [](auto &KV) { return KV.second.isBitMap(); });
7405
7406 // A few conditions prevent the generation of lookup tables:
7407 // 1. The target does not support lookup tables.
7408 // 2. The "no-jump-tables" function attribute is set.
7409 // However, these objections do not apply to other switch replacements, like
7410 // the bitmap, so we only stop here if any of these conditions are met and we
7411 // want to create a LUT. Otherwise, continue with the switch replacement.
7412 if (AnyLookupTables &&
7413 (!TTI.shouldBuildLookupTables() ||
7414 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7415 return false;
7416
7417 // In the early optimization pipeline, disable formation of lookup tables,
7418 // bit maps and mask checks, as they may inhibit further optimization.
7419 if (!ConvertSwitchToLookupTable &&
7420 (AnyLookupTables || AnyBitMaps || NeedMask))
7421 return false;
7422
7423 Builder.SetInsertPoint(SI);
7424 // TableIndex is the switch condition - TableIndexOffset if we don't
7425 // use the condition directly
7426 if (!UseSwitchConditionAsTableIndex) {
7427 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7428 // we can try to attach nsw.
7429 bool MayWrap = true;
7430 if (!DefaultIsReachable) {
7431 APInt Res =
7432 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7433 (void)Res;
7434 }
7435 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7436 "switch.tableidx", /*HasNUW =*/false,
7437 /*HasNSW =*/!MayWrap);
7438 }
7439
7440 std::vector<DominatorTree::UpdateType> Updates;
7441
7442 // Compute the maximum table size representable by the integer type we are
7443 // switching upon.
7444 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7445 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7446 assert(MaxTableSize >= TableSize &&
7447 "It is impossible for a switch to have more entries than the max "
7448 "representable value of its input integer type's size.");
7449
7450 // Create the BB that does the lookups.
7451 Module &Mod = *CommonDest->getParent()->getParent();
7452 BasicBlock *LookupBB = BasicBlock::Create(
7453 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7454
7455 BranchInst *RangeCheckBranch = nullptr;
7456 BranchInst *CondBranch = nullptr;
7457
7458 Builder.SetInsertPoint(SI);
7459 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7460 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7461 Builder.CreateBr(LookupBB);
7462 if (DTU)
7463 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7464 // Note: We call removeProdecessor later since we need to be able to get the
7465 // PHI value for the default case in case we're using a bit mask.
7466 } else {
7467 Value *Cmp = Builder.CreateICmpULT(
7468 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7469 RangeCheckBranch =
7470 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7471 CondBranch = RangeCheckBranch;
7472 if (DTU)
7473 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7474 }
7475
7476 // Populate the BB that does the lookups.
7477 Builder.SetInsertPoint(LookupBB);
7478
7479 if (NeedMask) {
7480 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7481 // re-purposed to do the hole check, and we create a new LookupBB.
7482 BasicBlock *MaskBB = LookupBB;
7483 MaskBB->setName("switch.hole_check");
7484 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7485 CommonDest->getParent(), CommonDest);
7486
7487 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7488 // unnecessary illegal types.
7489 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7490 APInt MaskInt(TableSizePowOf2, 0);
7491 APInt One(TableSizePowOf2, 1);
7492 // Build bitmask; fill in a 1 bit for every case.
7493 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7494 for (const auto &Result : ResultList) {
7495 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7496 .getLimitedValue();
7497 MaskInt |= One << Idx;
7498 }
7499 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7500
7501 // Get the TableIndex'th bit of the bitmask.
7502 // If this bit is 0 (meaning hole) jump to the default destination,
7503 // else continue with table lookup.
7504 IntegerType *MapTy = TableMask->getIntegerType();
7505 Value *MaskIndex =
7506 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7507 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7508 Value *LoBit = Builder.CreateTrunc(
7509 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7510 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7511 if (DTU) {
7512 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7513 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7514 }
7515 Builder.SetInsertPoint(LookupBB);
7516 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7517 }
7518
7519 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7520 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7521 // do not delete PHINodes here.
7522 SI->getDefaultDest()->removePredecessor(BB,
7523 /*KeepOneInputPHIs=*/true);
7524 if (DTU)
7525 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7526 }
7527
7528 for (PHINode *PHI : PHIs) {
7529 const ResultListTy &ResultList = ResultLists[PHI];
7530 auto Replacement = PhiToReplacementMap.at(PHI);
7531 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7532 // Do a small peephole optimization: re-use the switch table compare if
7533 // possible.
7534 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7535 BasicBlock *PhiBlock = PHI->getParent();
7536 // Search for compare instructions which use the phi.
7537 for (auto *User : PHI->users()) {
7538 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7539 Replacement.getDefaultValue(), ResultList);
7540 }
7541 }
7542
7543 PHI->addIncoming(Result, LookupBB);
7544 }
7545
7546 Builder.CreateBr(CommonDest);
7547 if (DTU)
7548 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7549
7550 SmallVector<uint32_t> BranchWeights;
7551 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7552 extractBranchWeights(*SI, BranchWeights);
7553 uint64_t ToLookupWeight = 0;
7554 uint64_t ToDefaultWeight = 0;
7555
7556 // Remove the switch.
7557 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7558 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7559 BasicBlock *Succ = SI->getSuccessor(I);
7560
7561 if (Succ == SI->getDefaultDest()) {
7562 if (HasBranchWeights)
7563 ToDefaultWeight += BranchWeights[I];
7564 continue;
7565 }
7566 Succ->removePredecessor(BB);
7567 if (DTU && RemovedSuccessors.insert(Succ).second)
7568 Updates.push_back({DominatorTree::Delete, BB, Succ});
7569 if (HasBranchWeights)
7570 ToLookupWeight += BranchWeights[I];
7571 }
7572 SI->eraseFromParent();
7573 if (HasBranchWeights)
7574 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7575 /*IsExpected=*/false);
7576 if (DTU)
7577 DTU->applyUpdates(Updates);
7578
7579 if (NeedMask)
7580 ++NumLookupTablesHoles;
7581 return true;
7582}
7583
7584/// Try to transform a switch that has "holes" in it to a contiguous sequence
7585/// of cases.
7586///
7587/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7588/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7589///
7590/// This converts a sparse switch into a dense switch which allows better
7591/// lowering and could also allow transforming into a lookup table.
7593 const DataLayout &DL,
7594 const TargetTransformInfo &TTI) {
7595 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7596 if (CondTy->getIntegerBitWidth() > 64 ||
7597 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7598 return false;
7599 // Only bother with this optimization if there are more than 3 switch cases;
7600 // SDAG will only bother creating jump tables for 4 or more cases.
7601 if (SI->getNumCases() < 4)
7602 return false;
7603
7604 // This transform is agnostic to the signedness of the input or case values. We
7605 // can treat the case values as signed or unsigned. We can optimize more common
7606 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7607 // as signed.
7609 for (const auto &C : SI->cases())
7610 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7611 llvm::sort(Values);
7612
7613 // If the switch is already dense, there's nothing useful to do here.
7614 if (isSwitchDense(Values))
7615 return false;
7616
7617 // First, transform the values such that they start at zero and ascend.
7618 int64_t Base = Values[0];
7619 for (auto &V : Values)
7620 V -= (uint64_t)(Base);
7621
7622 // Now we have signed numbers that have been shifted so that, given enough
7623 // precision, there are no negative values. Since the rest of the transform
7624 // is bitwise only, we switch now to an unsigned representation.
7625
7626 // This transform can be done speculatively because it is so cheap - it
7627 // results in a single rotate operation being inserted.
7628
7629 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7630 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7631 // less than 64.
7632 unsigned Shift = 64;
7633 for (auto &V : Values)
7634 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7635 assert(Shift < 64);
7636 if (Shift > 0)
7637 for (auto &V : Values)
7638 V = (int64_t)((uint64_t)V >> Shift);
7639
7640 if (!isSwitchDense(Values))
7641 // Transform didn't create a dense switch.
7642 return false;
7643
7644 // The obvious transform is to shift the switch condition right and emit a
7645 // check that the condition actually cleanly divided by GCD, i.e.
7646 // C & (1 << Shift - 1) == 0
7647 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7648 //
7649 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7650 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7651 // are nonzero then the switch condition will be very large and will hit the
7652 // default case.
7653
7654 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7655 Builder.SetInsertPoint(SI);
7656 Value *Sub =
7657 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
7658 Value *Rot = Builder.CreateIntrinsic(
7659 Ty, Intrinsic::fshl,
7660 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7661 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7662
7663 for (auto Case : SI->cases()) {
7664 auto *Orig = Case.getCaseValue();
7665 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7666 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7667 }
7668 return true;
7669}
7670
7671/// Tries to transform the switch when the condition is umin with a constant.
7672/// In that case, the default branch can be replaced by the constant's branch.
7673/// This method also removes dead cases when the simplification cannot replace
7674/// the default branch.
7675///
7676/// For example:
7677/// switch(umin(a, 3)) {
7678/// case 0:
7679/// case 1:
7680/// case 2:
7681/// case 3:
7682/// case 4:
7683/// // ...
7684/// default:
7685/// unreachable
7686/// }
7687///
7688/// Transforms into:
7689///
7690/// switch(a) {
7691/// case 0:
7692/// case 1:
7693/// case 2:
7694/// default:
7695/// // This is case 3
7696/// }
7698 Value *A;
7700
7701 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7702 return false;
7703
7706 BasicBlock *BB = SIW->getParent();
7707
7708 // Dead cases are removed even when the simplification fails.
7709 // A case is dead when its value is higher than the Constant.
7710 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7711 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7712 ++I;
7713 continue;
7714 }
7715 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7716 DeadCaseBB->removePredecessor(BB);
7717 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7718 I = SIW->removeCase(I);
7719 E = SIW->case_end();
7720 }
7721
7722 auto Case = SI->findCaseValue(Constant);
7723 // If the case value is not found, `findCaseValue` returns the default case.
7724 // In this scenario, since there is no explicit `case 3:`, the simplification
7725 // fails. The simplification also fails when the switch’s default destination
7726 // is reachable.
7727 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7728 if (DTU)
7729 DTU->applyUpdates(Updates);
7730 return !Updates.empty();
7731 }
7732
7733 BasicBlock *Unreachable = SI->getDefaultDest();
7734 SIW.replaceDefaultDest(Case);
7735 SIW.removeCase(Case);
7736 SIW->setCondition(A);
7737
7738 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7739
7740 if (DTU)
7741 DTU->applyUpdates(Updates);
7742
7743 return true;
7744}
7745
7746/// Tries to transform switch of powers of two to reduce switch range.
7747/// For example, switch like:
7748/// switch (C) { case 1: case 2: case 64: case 128: }
7749/// will be transformed to:
7750/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7751///
7752/// This transformation allows better lowering and may transform the switch
7753/// instruction into a sequence of bit manipulation and a smaller
7754/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7755/// address of the jump target, and indirectly jump to it).
7757 DomTreeUpdater *DTU,
7758 const DataLayout &DL,
7759 const TargetTransformInfo &TTI) {
7760 Value *Condition = SI->getCondition();
7761 LLVMContext &Context = SI->getContext();
7762 auto *CondTy = cast<IntegerType>(Condition->getType());
7763
7764 if (CondTy->getIntegerBitWidth() > 64 ||
7765 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7766 return false;
7767
7768 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7769 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7770 {Condition, ConstantInt::getTrue(Context)});
7771 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7772 TTI::TCC_Basic * 2)
7773 return false;
7774
7775 // Only bother with this optimization if there are more than 3 switch cases.
7776 // SDAG will start emitting jump tables for 4 or more cases.
7777 if (SI->getNumCases() < 4)
7778 return false;
7779
7780 // Check that switch cases are powers of two.
7782 for (const auto &Case : SI->cases()) {
7783 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7784 if (llvm::has_single_bit(CaseValue))
7785 Values.push_back(CaseValue);
7786 else
7787 return false;
7788 }
7789
7790 // isSwichDense requires case values to be sorted.
7791 llvm::sort(Values);
7792 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7793 llvm::countr_zero(Values.front()) + 1))
7794 // Transform is unable to generate dense switch.
7795 return false;
7796
7797 Builder.SetInsertPoint(SI);
7798
7799 if (!SI->defaultDestUnreachable()) {
7800 // Let non-power-of-two inputs jump to the default case, when the latter is
7801 // reachable.
7802 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7803 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7804
7805 auto *OrigBB = SI->getParent();
7806 auto *DefaultCaseBB = SI->getDefaultDest();
7807 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7808 auto It = OrigBB->getTerminator()->getIterator();
7809 SmallVector<uint32_t> Weights;
7810 auto HasWeights =
7812 auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7813 if (HasWeights && any_of(Weights, [](const auto &V) { return V != 0; })) {
7814 // IsPow2 covers a subset of the cases in which we'd go to the default
7815 // label. The other is those powers of 2 that don't appear in the case
7816 // statement. We don't know the distribution of the values coming in, so
7817 // the safest is to split 50-50 the original probability to `default`.
7818 uint64_t OrigDenominator =
7820 SmallVector<uint64_t> NewWeights(2);
7821 NewWeights[1] = Weights[0] / 2;
7822 NewWeights[0] = OrigDenominator - NewWeights[1];
7823 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7824 // The probability of executing the default block stays constant. It was
7825 // p_d = Weights[0] / OrigDenominator
7826 // we rewrite as W/D
7827 // We want to find the probability of the default branch of the switch
7828 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7829 // i.e. the original probability is the probability we go to the default
7830 // branch from the BI branch, or we take the default branch on the SI.
7831 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7832 // This matches using W/2 for the default branch probability numerator and
7833 // D-W/2 as the denominator.
7834 Weights[0] = NewWeights[1];
7835 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7836 for (auto &W : drop_begin(Weights))
7837 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7838
7839 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7840 }
7841 // BI is handling the default case for SI, and so should share its DebugLoc.
7842 BI->setDebugLoc(SI->getDebugLoc());
7843 It->eraseFromParent();
7844
7845 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7846 if (DTU)
7847 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7848 }
7849
7850 // Replace each case with its trailing zeros number.
7851 for (auto &Case : SI->cases()) {
7852 auto *OrigValue = Case.getCaseValue();
7853 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7854 OrigValue->getValue().countr_zero()));
7855 }
7856
7857 // Replace condition with its trailing zeros number.
7858 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7859 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7860
7861 SI->setCondition(ConditionTrailingZeros);
7862
7863 return true;
7864}
7865
7866/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7867/// the same destination.
7869 DomTreeUpdater *DTU) {
7870 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7871 if (!Cmp || !Cmp->hasOneUse())
7872 return false;
7873
7875 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7876 if (!HasWeights)
7877 Weights.resize(4); // Avoid checking HasWeights everywhere.
7878
7879 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7880 int64_t Res;
7881 BasicBlock *Succ, *OtherSucc;
7882 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7883 BasicBlock *Unreachable = nullptr;
7884
7885 if (SI->getNumCases() == 2) {
7886 // Find which of 1, 0 or -1 is missing (handled by default dest).
7887 SmallSet<int64_t, 3> Missing;
7888 Missing.insert(1);
7889 Missing.insert(0);
7890 Missing.insert(-1);
7891
7892 Succ = SI->getDefaultDest();
7893 SuccWeight = Weights[0];
7894 OtherSucc = nullptr;
7895 for (auto &Case : SI->cases()) {
7896 std::optional<int64_t> Val =
7897 Case.getCaseValue()->getValue().trySExtValue();
7898 if (!Val)
7899 return false;
7900 if (!Missing.erase(*Val))
7901 return false;
7902 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7903 return false;
7904 OtherSucc = Case.getCaseSuccessor();
7905 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7906 }
7907
7908 assert(Missing.size() == 1 && "Should have one case left");
7909 Res = *Missing.begin();
7910 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7911 // Normalize so that Succ is taken once and OtherSucc twice.
7912 Unreachable = SI->getDefaultDest();
7913 Succ = OtherSucc = nullptr;
7914 for (auto &Case : SI->cases()) {
7915 BasicBlock *NewSucc = Case.getCaseSuccessor();
7916 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7917 if (!OtherSucc || OtherSucc == NewSucc) {
7918 OtherSucc = NewSucc;
7919 OtherSuccWeight += Weight;
7920 } else if (!Succ) {
7921 Succ = NewSucc;
7922 SuccWeight = Weight;
7923 } else if (Succ == NewSucc) {
7924 std::swap(Succ, OtherSucc);
7925 std::swap(SuccWeight, OtherSuccWeight);
7926 } else
7927 return false;
7928 }
7929 for (auto &Case : SI->cases()) {
7930 std::optional<int64_t> Val =
7931 Case.getCaseValue()->getValue().trySExtValue();
7932 if (!Val || (Val != 1 && Val != 0 && Val != -1))
7933 return false;
7934 if (Case.getCaseSuccessor() == Succ) {
7935 Res = *Val;
7936 break;
7937 }
7938 }
7939 } else {
7940 return false;
7941 }
7942
7943 // Determine predicate for the missing case.
7945 switch (Res) {
7946 case 1:
7947 Pred = ICmpInst::ICMP_UGT;
7948 break;
7949 case 0:
7950 Pred = ICmpInst::ICMP_EQ;
7951 break;
7952 case -1:
7953 Pred = ICmpInst::ICMP_ULT;
7954 break;
7955 }
7956 if (Cmp->isSigned())
7957 Pred = ICmpInst::getSignedPredicate(Pred);
7958
7959 MDNode *NewWeights = nullptr;
7960 if (HasWeights)
7961 NewWeights = MDBuilder(SI->getContext())
7962 .createBranchWeights(SuccWeight, OtherSuccWeight);
7963
7964 BasicBlock *BB = SI->getParent();
7965 Builder.SetInsertPoint(SI->getIterator());
7966 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
7967 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
7968 SI->getMetadata(LLVMContext::MD_unpredictable));
7969 OtherSucc->removePredecessor(BB);
7970 if (Unreachable)
7971 Unreachable->removePredecessor(BB);
7972 SI->eraseFromParent();
7973 Cmp->eraseFromParent();
7974 if (DTU && Unreachable)
7975 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
7976 return true;
7977}
7978
7979/// Checking whether two cases of SI are equal depends on the contents of the
7980/// BasicBlock and the incoming values of their successor PHINodes.
7981/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
7982/// calling this function on each BasicBlock every time isEqual is called,
7983/// especially since the same BasicBlock may be passed as an argument multiple
7984/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
7985/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
7986/// of the incoming values.
7991
7994 return static_cast<SwitchSuccWrapper *>(
7996 }
7998 return static_cast<SwitchSuccWrapper *>(
8000 }
8001 static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
8002 BasicBlock *Succ = SSW->Dest;
8004 assert(BI->isUnconditional() &&
8005 "Only supporting unconditional branches for now");
8006 assert(BI->getNumSuccessors() == 1 &&
8007 "Expected unconditional branches to have one successor");
8008 assert(Succ->size() == 1 && "Expected just a single branch in the BB");
8009
8010 // Since we assume the BB is just a single BranchInst with a single
8011 // successor, we hash as the BB and the incoming Values of its successor
8012 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8013 // including the incoming PHI values leads to better performance.
8014 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8015 // time and passing it in SwitchSuccWrapper, but this slowed down the
8016 // average compile time without having any impact on the worst case compile
8017 // time.
8018 BasicBlock *BB = BI->getSuccessor(0);
8019 SmallVector<Value *> PhiValsForBB;
8020 for (PHINode &Phi : BB->phis())
8021 PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);
8022
8023 return hash_combine(BB, hash_combine_range(PhiValsForBB));
8024 }
8025 static bool isEqual(const SwitchSuccWrapper *LHS,
8026 const SwitchSuccWrapper *RHS) {
8029 if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
8030 return LHS == RHS;
8031
8032 BasicBlock *A = LHS->Dest;
8033 BasicBlock *B = RHS->Dest;
8034
8035 // FIXME: we checked that the size of A and B are both 1 in
8036 // simplifyDuplicateSwitchArms to make the Case list smaller to
8037 // improve performance. If we decide to support BasicBlocks with more
8038 // than just a single instruction, we need to check that A.size() ==
8039 // B.size() here, and we need to check more than just the BranchInsts
8040 // for equality.
8041
8042 BranchInst *ABI = cast<BranchInst>(A->getTerminator());
8043 BranchInst *BBI = cast<BranchInst>(B->getTerminator());
8044 assert(ABI->isUnconditional() && BBI->isUnconditional() &&
8045 "Only supporting unconditional branches for now");
8046 if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
8047 return false;
8048
8049 // Need to check that PHIs in successor have matching values
8050 BasicBlock *Succ = ABI->getSuccessor(0);
8051 for (PHINode &Phi : Succ->phis()) {
8052 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8053 if (PredIVs[A] != PredIVs[B])
8054 return false;
8055 }
8056
8057 return true;
8058 }
8059};
8060
8061bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8062 DomTreeUpdater *DTU) {
8063 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8064 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8065 // an entire PHI at once after the loop, opposed to calling
8066 // getIncomingValueForBlock inside this loop, since each call to
8067 // getIncomingValueForBlock is O(|Preds|).
8073 Cases.reserve(SI->getNumSuccessors());
8074
8075 for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
8076 BasicBlock *BB = SI->getSuccessor(I);
8077
8078 // FIXME: Support more than just a single BranchInst. One way we could do
8079 // this is by taking a hashing approach of all insts in BB.
8080 if (BB->size() != 1)
8081 continue;
8082
8083 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8084 // on other kinds of terminators. We decide to only support unconditional
8085 // branches for now for compile time reasons.
8086 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
8087 if (!BI || BI->isConditional())
8088 continue;
8089
8090 if (!Seen.insert(BB).second) {
8091 auto It = BBToSuccessorIndexes.find(BB);
8092 if (It != BBToSuccessorIndexes.end())
8093 It->second.emplace_back(I);
8094 continue;
8095 }
8096
8097 // FIXME: This case needs some extra care because the terminators other than
8098 // SI need to be updated. For now, consider only backedges to the SI.
8099 if (BB->getUniquePredecessor() != SI->getParent())
8100 continue;
8101
8102 // Keep track of which PHIs we need as keys in PhiPredIVs below.
8103 for (BasicBlock *Succ : BI->successors())
8105
8106 // Add the successor only if not previously visited.
8107 Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
8108 BBToSuccessorIndexes[BB].emplace_back(I);
8109 }
8110
8111 // Precompute a data structure to improve performance of isEqual for
8112 // SwitchSuccWrapper.
8113 PhiPredIVs.reserve(Phis.size());
8114 for (PHINode *Phi : Phis) {
8115 auto &IVs =
8116 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8117 for (auto &IV : Phi->incoming_values())
8118 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8119 }
8120
8121 // Build a set such that if the SwitchSuccWrapper exists in the set and
8122 // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
8123 // which is not in the set should be replaced with the one in the set. If the
8124 // SwitchSuccWrapper is not in the set, then it should be added to the set so
8125 // other SwitchSuccWrappers can check against it in the same manner. We use
8126 // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
8127 // around information to isEquality, getHashValue, and when doing the
8128 // replacement with better performance.
8129 DenseSet<const SwitchSuccWrapper *> ReplaceWith;
8130 ReplaceWith.reserve(Cases.size());
8131
8133 Updates.reserve(ReplaceWith.size());
8134 bool MadeChange = false;
8135 for (auto &SSW : Cases) {
8136 // SSW is a candidate for simplification. If we find a duplicate BB,
8137 // replace it.
8138 const auto [It, Inserted] = ReplaceWith.insert(&SSW);
8139 if (!Inserted) {
8140 // We know that SI's parent BB no longer dominates the old case successor
8141 // since we are making it dead.
8142 Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
8143 const auto &Successors = BBToSuccessorIndexes.at(SSW.Dest);
8144 for (unsigned Idx : Successors)
8145 SI->setSuccessor(Idx, (*It)->Dest);
8146 MadeChange = true;
8147 }
8148 }
8149
8150 if (DTU)
8151 DTU->applyUpdates(Updates);
8152
8153 return MadeChange;
8154}
8155
8156bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8157 BasicBlock *BB = SI->getParent();
8158
8159 if (isValueEqualityComparison(SI)) {
8160 // If we only have one predecessor, and if it is a branch on this value,
8161 // see if that predecessor totally determines the outcome of this switch.
8162 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8163 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8164 return requestResimplify();
8165
8166 Value *Cond = SI->getCondition();
8167 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8168 if (simplifySwitchOnSelect(SI, Select))
8169 return requestResimplify();
8170
8171 // If the block only contains the switch, see if we can fold the block
8172 // away into any preds.
8173 if (SI == &*BB->instructionsWithoutDebug(false).begin())
8174 if (foldValueComparisonIntoPredecessors(SI, Builder))
8175 return requestResimplify();
8176 }
8177
8178 // Try to transform the switch into an icmp and a branch.
8179 // The conversion from switch to comparison may lose information on
8180 // impossible switch values, so disable it early in the pipeline.
8181 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8182 return requestResimplify();
8183
8184 // Remove unreachable cases.
8185 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8186 return requestResimplify();
8187
8188 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8189 return requestResimplify();
8190
8191 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8192 return requestResimplify();
8193
8194 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8195 return requestResimplify();
8196
8197 // The conversion of switches to arithmetic or lookup table is disabled in
8198 // the early optimization pipeline, as it may lose information or make the
8199 // resulting code harder to analyze.
8200 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8201 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8202 Options.ConvertSwitchToLookupTable))
8203 return requestResimplify();
8204
8205 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8206 return requestResimplify();
8207
8208 if (reduceSwitchRange(SI, Builder, DL, TTI))
8209 return requestResimplify();
8210
8211 if (HoistCommon &&
8212 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8213 return requestResimplify();
8214
8215 if (simplifyDuplicateSwitchArms(SI, DTU))
8216 return requestResimplify();
8217
8218 if (simplifySwitchWhenUMin(SI, DTU))
8219 return requestResimplify();
8220
8221 return false;
8222}
8223
8224bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8225 BasicBlock *BB = IBI->getParent();
8226 bool Changed = false;
8227 SmallVector<uint32_t> BranchWeights;
8228 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8229 extractBranchWeights(*IBI, BranchWeights);
8230
8231 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8232 if (HasBranchWeights)
8233 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8234 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8235
8236 // Eliminate redundant destinations.
8237 SmallPtrSet<Value *, 8> Succs;
8238 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8239 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8240 BasicBlock *Dest = IBI->getDestination(I);
8241 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8242 if (!Dest->hasAddressTaken())
8243 RemovedSuccs.insert(Dest);
8244 Dest->removePredecessor(BB);
8245 IBI->removeDestination(I);
8246 --I;
8247 --E;
8248 Changed = true;
8249 }
8250 }
8251
8252 if (DTU) {
8253 std::vector<DominatorTree::UpdateType> Updates;
8254 Updates.reserve(RemovedSuccs.size());
8255 for (auto *RemovedSucc : RemovedSuccs)
8256 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8257 DTU->applyUpdates(Updates);
8258 }
8259
8260 if (IBI->getNumDestinations() == 0) {
8261 // If the indirectbr has no successors, change it to unreachable.
8262 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8264 return true;
8265 }
8266
8267 if (IBI->getNumDestinations() == 1) {
8268 // If the indirectbr has one successor, change it to a direct branch.
8271 return true;
8272 }
8273 if (HasBranchWeights) {
8274 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8275 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8276 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8277 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8278 }
8279 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8280 if (simplifyIndirectBrOnSelect(IBI, SI))
8281 return requestResimplify();
8282 }
8283 return Changed;
8284}
8285
8286/// Given an block with only a single landing pad and a unconditional branch
8287/// try to find another basic block which this one can be merged with. This
8288/// handles cases where we have multiple invokes with unique landing pads, but
8289/// a shared handler.
8290///
8291/// We specifically choose to not worry about merging non-empty blocks
8292/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8293/// practice, the optimizer produces empty landing pad blocks quite frequently
8294/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8295/// sinking in this file)
8296///
8297/// This is primarily a code size optimization. We need to avoid performing
8298/// any transform which might inhibit optimization (such as our ability to
8299/// specialize a particular handler via tail commoning). We do this by not
8300/// merging any blocks which require us to introduce a phi. Since the same
8301/// values are flowing through both blocks, we don't lose any ability to
8302/// specialize. If anything, we make such specialization more likely.
8303///
8304/// TODO - This transformation could remove entries from a phi in the target
8305/// block when the inputs in the phi are the same for the two blocks being
8306/// merged. In some cases, this could result in removal of the PHI entirely.
8308 BasicBlock *BB, DomTreeUpdater *DTU) {
8309 auto Succ = BB->getUniqueSuccessor();
8310 assert(Succ);
8311 // If there's a phi in the successor block, we'd likely have to introduce
8312 // a phi into the merged landing pad block.
8313 if (isa<PHINode>(*Succ->begin()))
8314 return false;
8315
8316 for (BasicBlock *OtherPred : predecessors(Succ)) {
8317 if (BB == OtherPred)
8318 continue;
8319 BasicBlock::iterator I = OtherPred->begin();
8321 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8322 continue;
8323 ++I;
8325 if (!BI2 || !BI2->isIdenticalTo(BI))
8326 continue;
8327
8328 std::vector<DominatorTree::UpdateType> Updates;
8329
8330 // We've found an identical block. Update our predecessors to take that
8331 // path instead and make ourselves dead.
8333 for (BasicBlock *Pred : UniquePreds) {
8334 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8335 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8336 "unexpected successor");
8337 II->setUnwindDest(OtherPred);
8338 if (DTU) {
8339 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8340 Updates.push_back({DominatorTree::Delete, Pred, BB});
8341 }
8342 }
8343
8345 for (BasicBlock *Succ : UniqueSuccs) {
8346 Succ->removePredecessor(BB);
8347 if (DTU)
8348 Updates.push_back({DominatorTree::Delete, BB, Succ});
8349 }
8350
8351 IRBuilder<> Builder(BI);
8352 Builder.CreateUnreachable();
8353 BI->eraseFromParent();
8354 if (DTU)
8355 DTU->applyUpdates(Updates);
8356 return true;
8357 }
8358 return false;
8359}
8360
8361bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
8362 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
8363 : simplifyCondBranch(Branch, Builder);
8364}
8365
8366bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
8367 IRBuilder<> &Builder) {
8368 BasicBlock *BB = BI->getParent();
8369 BasicBlock *Succ = BI->getSuccessor(0);
8370
8371 // If the Terminator is the only non-phi instruction, simplify the block.
8372 // If LoopHeader is provided, check if the block or its successor is a loop
8373 // header. (This is for early invocations before loop simplify and
8374 // vectorization to keep canonical loop forms for nested loops. These blocks
8375 // can be eliminated when the pass is invoked later in the back-end.)
8376 // Note that if BB has only one predecessor then we do not introduce new
8377 // backedge, so we can eliminate BB.
8378 bool NeedCanonicalLoop =
8379 Options.NeedCanonicalLoop &&
8380 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8381 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8383 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8384 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8385 return true;
8386
8387 // If the only instruction in the block is a seteq/setne comparison against a
8388 // constant, try to simplify the block.
8389 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8390 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8391 ++I;
8392 if (I->isTerminator() &&
8393 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8394 return true;
8395 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8396 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8397 Builder))
8398 return true;
8399 }
8400 }
8401
8402 // See if we can merge an empty landing pad block with another which is
8403 // equivalent.
8404 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8405 ++I;
8406 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8407 return true;
8408 }
8409
8410 // If this basic block is ONLY a compare and a branch, and if a predecessor
8411 // branches to us and our successor, fold the comparison into the
8412 // predecessor and use logical operations to update the incoming value
8413 // for PHI nodes in common successor.
8414 if (Options.SpeculateBlocks &&
8415 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8416 Options.BonusInstThreshold))
8417 return requestResimplify();
8418 return false;
8419}
8420
8422 BasicBlock *PredPred = nullptr;
8423 for (auto *P : predecessors(BB)) {
8424 BasicBlock *PPred = P->getSinglePredecessor();
8425 if (!PPred || (PredPred && PredPred != PPred))
8426 return nullptr;
8427 PredPred = PPred;
8428 }
8429 return PredPred;
8430}
8431
8432/// Fold the following pattern:
8433/// bb0:
8434/// br i1 %cond1, label %bb1, label %bb2
8435/// bb1:
8436/// br i1 %cond2, label %bb3, label %bb4
8437/// bb2:
8438/// br i1 %cond2, label %bb4, label %bb3
8439/// bb3:
8440/// ...
8441/// bb4:
8442/// ...
8443/// into
8444/// bb0:
8445/// %cond = xor i1 %cond1, %cond2
8446/// br i1 %cond, label %bb4, label %bb3
8447/// bb3:
8448/// ...
8449/// bb4:
8450/// ...
8451/// NOTE: %cond2 always dominates the terminator of bb0.
8453 BasicBlock *BB = BI->getParent();
8454 BasicBlock *BB1 = BI->getSuccessor(0);
8455 BasicBlock *BB2 = BI->getSuccessor(1);
8456 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
8457 if (Succ == BB)
8458 return false;
8459 if (&Succ->front() != Succ->getTerminator())
8460 return false;
8461 SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
8462 if (!SuccBI || !SuccBI->isConditional())
8463 return false;
8464 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8465 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8466 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8467 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8468 };
8469 BranchInst *BB1BI, *BB2BI;
8470 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8471 return false;
8472
8473 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8474 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8475 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8476 return false;
8477
8478 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8479 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8480 IRBuilder<> Builder(BI);
8481 BI->setCondition(
8482 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8483 BB1->removePredecessor(BB);
8484 BI->setSuccessor(0, BB4);
8485 BB2->removePredecessor(BB);
8486 BI->setSuccessor(1, BB3);
8487 if (DTU) {
8489 Updates.push_back({DominatorTree::Delete, BB, BB1});
8490 Updates.push_back({DominatorTree::Insert, BB, BB4});
8491 Updates.push_back({DominatorTree::Delete, BB, BB2});
8492 Updates.push_back({DominatorTree::Insert, BB, BB3});
8493
8494 DTU->applyUpdates(Updates);
8495 }
8496 bool HasWeight = false;
8497 uint64_t BBTWeight, BBFWeight;
8498 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8499 HasWeight = true;
8500 else
8501 BBTWeight = BBFWeight = 1;
8502 uint64_t BB1TWeight, BB1FWeight;
8503 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8504 HasWeight = true;
8505 else
8506 BB1TWeight = BB1FWeight = 1;
8507 uint64_t BB2TWeight, BB2FWeight;
8508 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8509 HasWeight = true;
8510 else
8511 BB2TWeight = BB2FWeight = 1;
8512 if (HasWeight) {
8513 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8514 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8515 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8516 /*ElideAllZero=*/true);
8517 }
8518 return true;
8519}
8520
8521bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8522 assert(
8524 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8525 "Tautological conditional branch should have been eliminated already.");
8526
8527 BasicBlock *BB = BI->getParent();
8528 if (!Options.SimplifyCondBranch ||
8529 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8530 return false;
8531
8532 // Conditional branch
8533 if (isValueEqualityComparison(BI)) {
8534 // If we only have one predecessor, and if it is a branch on this value,
8535 // see if that predecessor totally determines the outcome of this
8536 // switch.
8537 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8538 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8539 return requestResimplify();
8540
8541 // This block must be empty, except for the setcond inst, if it exists.
8542 // Ignore dbg and pseudo intrinsics.
8543 auto I = BB->instructionsWithoutDebug(true).begin();
8544 if (&*I == BI) {
8545 if (foldValueComparisonIntoPredecessors(BI, Builder))
8546 return requestResimplify();
8547 } else if (&*I == cast<Instruction>(BI->getCondition())) {
8548 ++I;
8549 if (&*I == BI && foldValueComparisonIntoPredecessors(BI, Builder))
8550 return requestResimplify();
8551 }
8552 }
8553
8554 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8555 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8556 return true;
8557
8558 // If this basic block has dominating predecessor blocks and the dominating
8559 // blocks' conditions imply BI's condition, we know the direction of BI.
8560 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8561 if (Imp) {
8562 // Turn this into a branch on constant.
8563 auto *OldCond = BI->getCondition();
8564 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8565 : ConstantInt::getFalse(BB->getContext());
8566 BI->setCondition(TorF);
8568 return requestResimplify();
8569 }
8570
8571 // If this basic block is ONLY a compare and a branch, and if a predecessor
8572 // branches to us and one of our successors, fold the comparison into the
8573 // predecessor and use logical operations to pick the right destination.
8574 if (Options.SpeculateBlocks &&
8575 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
8576 Options.BonusInstThreshold))
8577 return requestResimplify();
8578
8579 // We have a conditional branch to two blocks that are only reachable
8580 // from BI. We know that the condbr dominates the two blocks, so see if
8581 // there is any identical code in the "then" and "else" blocks. If so, we
8582 // can hoist it up to the branching block.
8583 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8584 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8585 if (HoistCommon &&
8586 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8587 return requestResimplify();
8588
8589 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8590 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8591 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8592 auto CanSpeculateConditionalLoadsStores = [&]() {
8593 for (auto *Succ : successors(BB)) {
8594 for (Instruction &I : *Succ) {
8595 if (I.isTerminator()) {
8596 if (I.getNumSuccessors() > 1)
8597 return false;
8598 continue;
8599 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8600 SpeculatedConditionalLoadsStores.size() ==
8602 return false;
8603 }
8604 SpeculatedConditionalLoadsStores.push_back(&I);
8605 }
8606 }
8607 return !SpeculatedConditionalLoadsStores.empty();
8608 };
8609
8610 if (CanSpeculateConditionalLoadsStores()) {
8611 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8612 std::nullopt, nullptr);
8613 return requestResimplify();
8614 }
8615 }
8616 } else {
8617 // If Successor #1 has multiple preds, we may be able to conditionally
8618 // execute Successor #0 if it branches to Successor #1.
8619 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8620 if (Succ0TI->getNumSuccessors() == 1 &&
8621 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8622 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8623 return requestResimplify();
8624 }
8625 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8626 // If Successor #0 has multiple preds, we may be able to conditionally
8627 // execute Successor #1 if it branches to Successor #0.
8628 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8629 if (Succ1TI->getNumSuccessors() == 1 &&
8630 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8631 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8632 return requestResimplify();
8633 }
8634
8635 // If this is a branch on something for which we know the constant value in
8636 // predecessors (e.g. a phi node in the current block), thread control
8637 // through this block.
8638 if (foldCondBranchOnValueKnownInPredecessor(BI))
8639 return requestResimplify();
8640
8641 // Scan predecessor blocks for conditional branches.
8642 for (BasicBlock *Pred : predecessors(BB))
8643 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
8644 if (PBI != BI && PBI->isConditional())
8645 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8646 return requestResimplify();
8647
8648 // Look for diamond patterns.
8649 if (MergeCondStores)
8650 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8651 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
8652 if (PBI != BI && PBI->isConditional())
8653 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8654 return requestResimplify();
8655
8656 // Look for nested conditional branches.
8657 if (mergeNestedCondBranch(BI, DTU))
8658 return requestResimplify();
8659
8660 return false;
8661}
8662
8663/// Check if passing a value to an instruction will cause undefined behavior.
8664static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8665 assert(V->getType() == I->getType() && "Mismatched types");
8667 if (!C)
8668 return false;
8669
8670 if (I->use_empty())
8671 return false;
8672
8673 if (C->isNullValue() || isa<UndefValue>(C)) {
8674 // Only look at the first use we can handle, avoid hurting compile time with
8675 // long uselists
8676 auto FindUse = llvm::find_if(I->uses(), [](auto &U) {
8677 auto *Use = cast<Instruction>(U.getUser());
8678 // Change this list when we want to add new instructions.
8679 switch (Use->getOpcode()) {
8680 default:
8681 return false;
8682 case Instruction::GetElementPtr:
8683 case Instruction::Ret:
8684 case Instruction::BitCast:
8685 case Instruction::Load:
8686 case Instruction::Store:
8687 case Instruction::Call:
8688 case Instruction::CallBr:
8689 case Instruction::Invoke:
8690 case Instruction::UDiv:
8691 case Instruction::URem:
8692 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8693 // implemented to avoid code complexity as it is unclear how useful such
8694 // logic is.
8695 case Instruction::SDiv:
8696 case Instruction::SRem:
8697 return true;
8698 }
8699 });
8700 if (FindUse == I->use_end())
8701 return false;
8702 auto &Use = *FindUse;
8703 auto *User = cast<Instruction>(Use.getUser());
8704 // Bail out if User is not in the same BB as I or User == I or User comes
8705 // before I in the block. The latter two can be the case if User is a
8706 // PHI node.
8707 if (User->getParent() != I->getParent() || User == I ||
8708 User->comesBefore(I))
8709 return false;
8710
8711 // Now make sure that there are no instructions in between that can alter
8712 // control flow (eg. calls)
8713 auto InstrRange =
8714 make_range(std::next(I->getIterator()), User->getIterator());
8715 if (any_of(InstrRange, [](Instruction &I) {
8717 }))
8718 return false;
8719
8720 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8722 if (GEP->getPointerOperand() == I) {
8723 // The type of GEP may differ from the type of base pointer.
8724 // Bail out on vector GEPs, as they are not handled by other checks.
8725 if (GEP->getType()->isVectorTy())
8726 return false;
8727 // The current base address is null, there are four cases to consider:
8728 // getelementptr (TY, null, 0) -> null
8729 // getelementptr (TY, null, not zero) -> may be modified
8730 // getelementptr inbounds (TY, null, 0) -> null
8731 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8732 // undefined?
8733 if (!GEP->hasAllZeroIndices() &&
8734 (!GEP->isInBounds() ||
8735 NullPointerIsDefined(GEP->getFunction(),
8736 GEP->getPointerAddressSpace())))
8737 PtrValueMayBeModified = true;
8738 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8739 }
8740
8741 // Look through return.
8742 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8743 bool HasNoUndefAttr =
8744 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8745 // Return undefined to a noundef return value is undefined.
8746 if (isa<UndefValue>(C) && HasNoUndefAttr)
8747 return true;
8748 // Return null to a nonnull+noundef return value is undefined.
8749 if (C->isNullValue() && HasNoUndefAttr &&
8750 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8751 return !PtrValueMayBeModified;
8752 }
8753 }
8754
8755 // Load from null is undefined.
8756 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8757 if (!LI->isVolatile())
8758 return !NullPointerIsDefined(LI->getFunction(),
8759 LI->getPointerAddressSpace());
8760
8761 // Store to null is undefined.
8763 if (!SI->isVolatile())
8764 return (!NullPointerIsDefined(SI->getFunction(),
8765 SI->getPointerAddressSpace())) &&
8766 SI->getPointerOperand() == I;
8767
8768 // llvm.assume(false/undef) always triggers immediate UB.
8769 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8770 // Ignore assume operand bundles.
8771 if (I == Assume->getArgOperand(0))
8772 return true;
8773 }
8774
8775 if (auto *CB = dyn_cast<CallBase>(User)) {
8776 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8777 return false;
8778 // A call to null is undefined.
8779 if (CB->getCalledOperand() == I)
8780 return true;
8781
8782 if (CB->isArgOperand(&Use)) {
8783 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8784 // Passing null to a nonnnull+noundef argument is undefined.
8786 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8787 return !PtrValueMayBeModified;
8788 // Passing undef to a noundef argument is undefined.
8789 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8790 return true;
8791 }
8792 }
8793 // Div/Rem by zero is immediate UB
8794 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8795 return true;
8796 }
8797 return false;
8798}
8799
8800/// If BB has an incoming value that will always trigger undefined behavior
8801/// (eg. null pointer dereference), remove the branch leading here.
8803 DomTreeUpdater *DTU,
8804 AssumptionCache *AC) {
8805 for (PHINode &PHI : BB->phis())
8806 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8807 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8808 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8809 Instruction *T = Predecessor->getTerminator();
8810 IRBuilder<> Builder(T);
8811 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
8812 BB->removePredecessor(Predecessor);
8813 // Turn unconditional branches into unreachables and remove the dead
8814 // destination from conditional branches.
8815 if (BI->isUnconditional())
8816 Builder.CreateUnreachable();
8817 else {
8818 // Preserve guarding condition in assume, because it might not be
8819 // inferrable from any dominating condition.
8820 Value *Cond = BI->getCondition();
8821 CallInst *Assumption;
8822 if (BI->getSuccessor(0) == BB)
8823 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8824 else
8825 Assumption = Builder.CreateAssumption(Cond);
8826 if (AC)
8827 AC->registerAssumption(cast<AssumeInst>(Assumption));
8828 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8829 : BI->getSuccessor(0));
8830 }
8831 BI->eraseFromParent();
8832 if (DTU)
8833 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8834 return true;
8835 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8836 // Redirect all branches leading to UB into
8837 // a newly created unreachable block.
8838 BasicBlock *Unreachable = BasicBlock::Create(
8839 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8840 Builder.SetInsertPoint(Unreachable);
8841 // The new block contains only one instruction: Unreachable
8842 Builder.CreateUnreachable();
8843 for (const auto &Case : SI->cases())
8844 if (Case.getCaseSuccessor() == BB) {
8845 BB->removePredecessor(Predecessor);
8846 Case.setSuccessor(Unreachable);
8847 }
8848 if (SI->getDefaultDest() == BB) {
8849 BB->removePredecessor(Predecessor);
8850 SI->setDefaultDest(Unreachable);
8851 }
8852
8853 if (DTU)
8854 DTU->applyUpdates(
8855 { { DominatorTree::Insert, Predecessor, Unreachable },
8856 { DominatorTree::Delete, Predecessor, BB } });
8857 return true;
8858 }
8859 }
8860
8861 return false;
8862}
8863
8864bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8865 bool Changed = false;
8866
8867 assert(BB && BB->getParent() && "Block not embedded in function!");
8868 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8869
8870 // Remove basic blocks that have no predecessors (except the entry block)...
8871 // or that just have themself as a predecessor. These are unreachable.
8872 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8873 BB->getSinglePredecessor() == BB) {
8874 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8875 DeleteDeadBlock(BB, DTU);
8876 return true;
8877 }
8878
8879 // Check to see if we can constant propagate this terminator instruction
8880 // away...
8881 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
8882 /*TLI=*/nullptr, DTU);
8883
8884 // Check for and eliminate duplicate PHI nodes in this block.
8886
8887 // Check for and remove branches that will always cause undefined behavior.
8889 return requestResimplify();
8890
8891 // Merge basic blocks into their predecessor if there is only one distinct
8892 // pred, and if there is only one distinct successor of the predecessor, and
8893 // if there are no PHI nodes.
8894 if (MergeBlockIntoPredecessor(BB, DTU))
8895 return true;
8896
8897 if (SinkCommon && Options.SinkCommonInsts)
8898 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
8899 mergeCompatibleInvokes(BB, DTU)) {
8900 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
8901 // so we may now how duplicate PHI's.
8902 // Let's rerun EliminateDuplicatePHINodes() first,
8903 // before foldTwoEntryPHINode() potentially converts them into select's,
8904 // after which we'd need a whole EarlyCSE pass run to cleanup them.
8905 return true;
8906 }
8907
8908 IRBuilder<> Builder(BB);
8909
8910 if (Options.SpeculateBlocks &&
8911 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
8912 // If there is a trivial two-entry PHI node in this basic block, and we can
8913 // eliminate it, do so now.
8914 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
8915 if (PN->getNumIncomingValues() == 2)
8916 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
8917 Options.SpeculateUnpredictables))
8918 return true;
8919 }
8920
8922 Builder.SetInsertPoint(Terminator);
8923 switch (Terminator->getOpcode()) {
8924 case Instruction::Br:
8925 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
8926 break;
8927 case Instruction::Resume:
8928 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
8929 break;
8930 case Instruction::CleanupRet:
8931 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
8932 break;
8933 case Instruction::Switch:
8934 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
8935 break;
8936 case Instruction::Unreachable:
8937 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
8938 break;
8939 case Instruction::IndirectBr:
8940 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
8941 break;
8942 }
8943
8944 return Changed;
8945}
8946
8947bool SimplifyCFGOpt::run(BasicBlock *BB) {
8948 bool Changed = false;
8949
8950 // Repeated simplify BB as long as resimplification is requested.
8951 do {
8952 Resimplify = false;
8953
8954 // Perform one round of simplifcation. Resimplify flag will be set if
8955 // another iteration is requested.
8956 Changed |= simplifyOnce(BB);
8957 } while (Resimplify);
8958
8959 return Changed;
8960}
8961
8964 ArrayRef<WeakVH> LoopHeaders) {
8965 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
8966 Options)
8967 .run(BB);
8968}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
unsigned unsigned DefaultVal
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isProfitableToSpeculate(const BranchInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static void hoistConditionalLoadsStores(BranchInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool tryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1167
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1575
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition ArrayRef.h:152
const T & front() const
front - Get the first element.
Definition ArrayRef.h:146
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:482
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:662
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:934
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
bool isConditional() const
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:982
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1120
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:220
bool isNegative() const
Definition Constants.h:209
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:264
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:193
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:157
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:90
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:256
static DebugLoc getTemporary()
Definition DebugLoc.h:161
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:181
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:168
static DebugLoc getDropped()
Definition DebugLoc.h:164
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:114
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
const BasicBlock & getEntryBlock() const
Definition Function.h:807
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2348
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2103
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2645
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1513
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1934
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1808
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1220
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2332
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1863
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2197
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2280
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1078
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
size_type size() const
Definition MapVector.h:56
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
LLVM_ABI CaseIt removeCase(CaseIt I)
This method removes the specified case and its successor from the switch instruction.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
LLVM_ABI void set(Value *Val)
Definition Value.h:905
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:292
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:53
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Represents an op.with.overflow intrinsic.
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition DenseSet.h:96
size_type size() const
Definition DenseSet.h:87
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
bind_ty< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:193
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
bool succ_empty(const Instruction *I)
Definition CFG.h:257
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
LLVM_ABI BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1688
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1777
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2128
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1140
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< uint32_t, 2 > &B1, const SmallVector< uint32_t, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2845
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3094
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3368
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3875
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1703
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI bool foldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1582
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2088
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:315
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two cases of SI are equal depends on the contents of the BasicBlock and the incoming...
DenseMap< PHINode *, SmallDenseMap< BasicBlock *, Value *, 8 > > * PhiPredIVs
LLVM_ABI AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
static const SwitchSuccWrapper * getEmptyKey()
static const SwitchSuccWrapper * getTombstoneKey()
static unsigned getHashValue(const SwitchSuccWrapper *SSW)
static bool isEqual(const SwitchSuccWrapper *LHS, const SwitchSuccWrapper *RHS)
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:257