LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->getSuccessor() != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
853 Cond = dyn_cast<Instruction>(BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
855 Cond = dyn_cast<Instruction>(IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI))
873 if (BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
876 CV = ICI->getOperand(0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
901 Cases.reserve(SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 CondBrInst *BI = cast<CondBrInst>(TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(ICI->getOperand(1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Cond);
918 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(Cases, BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(V1, V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(V1->begin(), V1->end());
954 array_pod_sort(V2->begin(), V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
990 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
995 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(PredCases, ThisCases))
1004 return false;
1005
1006 if (isa<CondBrInst>(TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(i->getCaseValue())) {
1046 Successor->removePredecessor(PredDef);
1047 SI.removeCase(i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Succ);
1098 Succ->removePredecessor(TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(Weights.front(), Weights.back());
1163 }
1164}
1165
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 // Skip cloning pseudo probes into the predecessor, as it would overcount
1178 // otherwise.
1179 if (isa<PseudoProbeInst>(BonusInst))
1180 continue;
1181
1182 Instruction *NewBonusInst = BonusInst.clone();
1183
1184 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1185 // Unless the instruction has the same !dbg location as the original
1186 // branch, drop it. When we fold the bonus instructions we want to make
1187 // sure we reset their debug locations in order to avoid stepping on
1188 // dead code caused by folding dead branches.
1189 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1190 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1191 mapAtomInstance(DL, VMap);
1192 }
1193
1194 RemapInstruction(NewBonusInst, VMap,
1196
1197 // If we speculated an instruction, we need to drop any metadata that may
1198 // result in undefined behavior, as the metadata might have been valid
1199 // only given the branch precondition.
1200 // Similarly strip attributes on call parameters that may cause UB in
1201 // location the call is moved to.
1202 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1203
1204 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1205 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1206 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1208
1209 NewBonusInst->takeName(&BonusInst);
1210 BonusInst.setName(NewBonusInst->getName() + ".old");
1211 VMap[&BonusInst] = NewBonusInst;
1212
1213 // Update (liveout) uses of bonus instructions,
1214 // now that the bonus instruction has been cloned into predecessor.
1215 // Note that we expect to be in a block-closed SSA form for this to work!
1216 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1217 auto *UI = cast<Instruction>(U.getUser());
1218 auto *PN = dyn_cast<PHINode>(UI);
1219 if (!PN) {
1220 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1221 "If the user is not a PHI node, then it should be in the same "
1222 "block as, and come after, the original bonus instruction.");
1223 continue; // Keep using the original bonus instruction.
1224 }
1225 // Is this the block-closed SSA form PHI node?
1226 if (PN->getIncomingBlock(U) == BB)
1227 continue; // Great, keep using the original bonus instruction.
1228 // The only other alternative is an "use" when coming from
1229 // the predecessor block - here we should refer to the cloned bonus instr.
1230 assert(PN->getIncomingBlock(U) == PredBlock &&
1231 "Not in block-closed SSA form?");
1232 U.set(NewBonusInst);
1233 }
1234 }
1235
1236 // Key Instructions: We may have propagated atom info into the pred. If the
1237 // pred's terminator already has atom info do nothing as merging would drop
1238 // one atom group anyway. If it doesn't, propagte the remapped atom group
1239 // from BB's terminator.
1240 if (auto &PredDL = PTI->getDebugLoc()) {
1241 auto &DL = BB->getTerminator()->getDebugLoc();
1242 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1243 PredDL.isSameSourceLocation(DL)) {
1244 PTI->setDebugLoc(DL);
1245 RemapSourceAtom(PTI, VMap);
1246 }
1247 }
1248}
1249
1250bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1251 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1252 BasicBlock *BB = TI->getParent();
1253 BasicBlock *Pred = PTI->getParent();
1254
1256
1257 // Figure out which 'cases' to copy from SI to PSI.
1258 std::vector<ValueEqualityComparisonCase> BBCases;
1259 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1260
1261 std::vector<ValueEqualityComparisonCase> PredCases;
1262 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1263
1264 // Based on whether the default edge from PTI goes to BB or not, fill in
1265 // PredCases and PredDefault with the new switch cases we would like to
1266 // build.
1267 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1268
1269 // Update the branch weight metadata along the way
1270 SmallVector<uint64_t, 8> Weights;
1271 bool PredHasWeights = hasBranchWeightMD(*PTI);
1272 bool SuccHasWeights = hasBranchWeightMD(*TI);
1273
1274 if (PredHasWeights) {
1275 getBranchWeights(PTI, Weights);
1276 // branch-weight metadata is inconsistent here.
1277 if (Weights.size() != 1 + PredCases.size())
1278 PredHasWeights = SuccHasWeights = false;
1279 } else if (SuccHasWeights)
1280 // If there are no predecessor weights but there are successor weights,
1281 // populate Weights with 1, which will later be scaled to the sum of
1282 // successor's weights
1283 Weights.assign(1 + PredCases.size(), 1);
1284
1285 SmallVector<uint64_t, 8> SuccWeights;
1286 if (SuccHasWeights) {
1287 getBranchWeights(TI, SuccWeights);
1288 // branch-weight metadata is inconsistent here.
1289 if (SuccWeights.size() != 1 + BBCases.size())
1290 PredHasWeights = SuccHasWeights = false;
1291 } else if (PredHasWeights)
1292 SuccWeights.assign(1 + BBCases.size(), 1);
1293
1294 if (PredDefault == BB) {
1295 // If this is the default destination from PTI, only the edges in TI
1296 // that don't occur in PTI, or that branch to BB will be activated.
1297 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1298 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1299 if (PredCases[i].Dest != BB)
1300 PTIHandled.insert(PredCases[i].Value);
1301 else {
1302 // The default destination is BB, we don't need explicit targets.
1303 std::swap(PredCases[i], PredCases.back());
1304
1305 if (PredHasWeights || SuccHasWeights) {
1306 // Increase weight for the default case.
1307 Weights[0] += Weights[i + 1];
1308 std::swap(Weights[i + 1], Weights.back());
1309 Weights.pop_back();
1310 }
1311
1312 PredCases.pop_back();
1313 --i;
1314 --e;
1315 }
1316
1317 // Reconstruct the new switch statement we will be building.
1318 if (PredDefault != BBDefault) {
1319 PredDefault->removePredecessor(Pred);
1320 if (DTU && PredDefault != BB)
1321 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1322 PredDefault = BBDefault;
1323 ++NewSuccessors[BBDefault];
1324 }
1325
1326 unsigned CasesFromPred = Weights.size();
1327 uint64_t ValidTotalSuccWeight = 0;
1328 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1329 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1330 PredCases.push_back(BBCases[i]);
1331 ++NewSuccessors[BBCases[i].Dest];
1332 if (SuccHasWeights || PredHasWeights) {
1333 // The default weight is at index 0, so weight for the ith case
1334 // should be at index i+1. Scale the cases from successor by
1335 // PredDefaultWeight (Weights[0]).
1336 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1337 ValidTotalSuccWeight += SuccWeights[i + 1];
1338 }
1339 }
1340
1341 if (SuccHasWeights || PredHasWeights) {
1342 ValidTotalSuccWeight += SuccWeights[0];
1343 // Scale the cases from predecessor by ValidTotalSuccWeight.
1344 for (unsigned i = 1; i < CasesFromPred; ++i)
1345 Weights[i] *= ValidTotalSuccWeight;
1346 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1347 Weights[0] *= SuccWeights[0];
1348 }
1349 } else {
1350 // If this is not the default destination from PSI, only the edges
1351 // in SI that occur in PSI with a destination of BB will be
1352 // activated.
1353 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1354 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1355 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1356 if (PredCases[i].Dest == BB) {
1357 PTIHandled.insert(PredCases[i].Value);
1358
1359 if (PredHasWeights || SuccHasWeights) {
1360 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1361 std::swap(Weights[i + 1], Weights.back());
1362 Weights.pop_back();
1363 }
1364
1365 std::swap(PredCases[i], PredCases.back());
1366 PredCases.pop_back();
1367 --i;
1368 --e;
1369 }
1370
1371 // Okay, now we know which constants were sent to BB from the
1372 // predecessor. Figure out where they will all go now.
1373 for (const ValueEqualityComparisonCase &Case : BBCases)
1374 if (PTIHandled.count(Case.Value)) {
1375 // If this is one we are capable of getting...
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[Case.Value]);
1378 PredCases.push_back(Case);
1379 ++NewSuccessors[Case.Dest];
1380 PTIHandled.erase(Case.Value); // This constant is taken care of
1381 }
1382
1383 // If there are any constants vectored to BB that TI doesn't handle,
1384 // they must go to the default destination of TI.
1385 for (ConstantInt *I : PTIHandled) {
1386 if (PredHasWeights || SuccHasWeights)
1387 Weights.push_back(WeightsForHandled[I]);
1388 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1389 ++NewSuccessors[BBDefault];
1390 }
1391 }
1392
1393 // Okay, at this point, we know which new successor Pred will get. Make
1394 // sure we update the number of entries in the PHI nodes for these
1395 // successors.
1396 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1397 if (DTU) {
1398 SuccsOfPred = {llvm::from_range, successors(Pred)};
1399 Updates.reserve(Updates.size() + NewSuccessors.size());
1400 }
1401 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1402 NewSuccessors) {
1403 for (auto I : seq(NewSuccessor.second)) {
1404 (void)I;
1405 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1406 }
1407 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1408 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1409 }
1410
1411 Builder.SetInsertPoint(PTI);
1412 // Convert pointer to int before we switch.
1413 if (CV->getType()->isPointerTy()) {
1414 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1415 "Should not end up here with unstable pointers");
1416 CV =
1417 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1418 }
1419
1420 // Now that the successors are updated, create the new Switch instruction.
1421 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1422 NewSI->setDebugLoc(PTI->getDebugLoc());
1423 for (ValueEqualityComparisonCase &V : PredCases)
1424 NewSI->addCase(V.Value, V.Dest);
1425
1426 if (PredHasWeights || SuccHasWeights)
1427 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1428 /*ElideAllZero=*/true);
1429
1431
1432 // Okay, last check. If BB is still a successor of PSI, then we must
1433 // have an infinite loop case. If so, add an infinitely looping block
1434 // to handle the case to preserve the behavior of the code.
1435 BasicBlock *InfLoopBlock = nullptr;
1436 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1437 if (NewSI->getSuccessor(i) == BB) {
1438 if (!InfLoopBlock) {
1439 // Insert it at the end of the function, because it's either code,
1440 // or it won't matter if it's hot. :)
1441 InfLoopBlock =
1442 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1443 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
1444 if (DTU)
1445 Updates.push_back(
1446 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1447 }
1448 NewSI->setSuccessor(i, InfLoopBlock);
1449 }
1450
1451 if (DTU) {
1452 if (InfLoopBlock)
1453 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1454
1455 Updates.push_back({DominatorTree::Delete, Pred, BB});
1456
1457 DTU->applyUpdates(Updates);
1458 }
1459
1460 ++NumFoldValueComparisonIntoPredecessors;
1461 return true;
1462}
1463
1464/// The specified terminator is a value equality comparison instruction
1465/// (either a switch or a branch on "X == c").
1466/// See if any of the predecessors of the terminator block are value comparisons
1467/// on the same value. If so, and if safe to do so, fold them together.
1468bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1469 IRBuilder<> &Builder) {
1470 BasicBlock *BB = TI->getParent();
1471 Value *CV = isValueEqualityComparison(TI); // CondVal
1472 assert(CV && "Not a comparison?");
1473
1474 bool Changed = false;
1475
1476 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1477 while (!Preds.empty()) {
1478 BasicBlock *Pred = Preds.pop_back_val();
1479 Instruction *PTI = Pred->getTerminator();
1480
1481 // Don't try to fold into itself.
1482 if (Pred == BB)
1483 continue;
1484
1485 // See if the predecessor is a comparison with the same value.
1486 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1487 if (PCV != CV)
1488 continue;
1489
1490 SmallSetVector<BasicBlock *, 4> FailBlocks;
1491 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1492 for (auto *Succ : FailBlocks) {
1493 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1494 return false;
1495 }
1496 }
1497
1498 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1499 Changed = true;
1500 }
1501 return Changed;
1502}
1503
1504// If we would need to insert a select that uses the value of this invoke
1505// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1506// need to do this), we can't hoist the invoke, as there is nowhere to put the
1507// select in this case.
1509 Instruction *I1, Instruction *I2) {
1510 for (BasicBlock *Succ : successors(BB1)) {
1511 for (const PHINode &PN : Succ->phis()) {
1512 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1513 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1514 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1515 return false;
1516 }
1517 }
1518 }
1519 return true;
1520}
1521
1522// Get interesting characteristics of instructions that
1523// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1524// instructions can be reordered across.
1530
1532 // Pseudo probes don't constrain reordering of other instructions.
1534 return 0;
1535 unsigned Flags = 0;
1536 if (I->mayReadFromMemory())
1537 Flags |= SkipReadMem;
1538 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1539 // inalloca) across stacksave/stackrestore boundaries.
1540 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1541 Flags |= SkipSideEffect;
1543 Flags |= SkipImplicitControlFlow;
1544 return Flags;
1545}
1546
1547// Returns true if it is safe to reorder an instruction across preceding
1548// instructions in a basic block.
1549static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1550 // Don't reorder a store over a load.
1551 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1552 return false;
1553
1554 // If we have seen an instruction with side effects, it's unsafe to reorder an
1555 // instruction which reads memory or itself has side effects.
1556 if ((Flags & SkipSideEffect) &&
1557 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1558 return false;
1559
1560 // Reordering across an instruction which does not necessarily transfer
1561 // control to the next instruction is speculation.
1563 return false;
1564
1565 // Hoisting of llvm.deoptimize is only legal together with the next return
1566 // instruction, which this pass is not always able to do.
1567 if (auto *CB = dyn_cast<CallBase>(I))
1568 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1569 return false;
1570
1571 // It's also unsafe/illegal to hoist an instruction above its instruction
1572 // operands
1573 BasicBlock *BB = I->getParent();
1574 for (Value *Op : I->operands()) {
1575 if (auto *J = dyn_cast<Instruction>(Op))
1576 if (J->getParent() == BB)
1577 return false;
1578 }
1579
1580 return true;
1581}
1582
1583static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1584
1585/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1586/// instructions \p I1 and \p I2 can and should be hoisted.
1588 const TargetTransformInfo &TTI) {
1589 // If we're going to hoist a call, make sure that the two instructions
1590 // we're commoning/hoisting are both marked with musttail, or neither of
1591 // them is marked as such. Otherwise, we might end up in a situation where
1592 // we hoist from a block where the terminator is a `ret` to a block where
1593 // the terminator is a `br`, and `musttail` calls expect to be followed by
1594 // a return.
1595 auto *C1 = dyn_cast<CallInst>(I1);
1596 auto *C2 = dyn_cast<CallInst>(I2);
1597 if (C1 && C2)
1598 if (C1->isMustTailCall() != C2->isMustTailCall())
1599 return false;
1600
1601 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1602 return false;
1603
1604 // If any of the two call sites has nomerge or convergent attribute, stop
1605 // hoisting.
1606 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1607 if (CB1->cannotMerge() || CB1->isConvergent())
1608 return false;
1609 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1610 if (CB2->cannotMerge() || CB2->isConvergent())
1611 return false;
1612
1613 return true;
1614}
1615
1616/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1617/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1618/// hoistCommonCodeFromSuccessors. e.g. The input:
1619/// I1 DVRs: { x, z },
1620/// OtherInsts: { I2 DVRs: { x, y, z } }
1621/// would result in hoisting only DbgVariableRecord x.
1623 Instruction *TI, Instruction *I1,
1624 SmallVectorImpl<Instruction *> &OtherInsts) {
1625 if (!I1->hasDbgRecords())
1626 return;
1627 using CurrentAndEndIt =
1628 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1629 // Vector of {Current, End} iterators.
1631 Itrs.reserve(OtherInsts.size() + 1);
1632 // Helper lambdas for lock-step checks:
1633 // Return true if this Current == End.
1634 auto atEnd = [](const CurrentAndEndIt &Pair) {
1635 return Pair.first == Pair.second;
1636 };
1637 // Return true if all Current are identical.
1638 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1639 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1641 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1642 });
1643 };
1644
1645 // Collect the iterators.
1646 Itrs.push_back(
1647 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1648 for (Instruction *Other : OtherInsts) {
1649 if (!Other->hasDbgRecords())
1650 return;
1651 Itrs.push_back(
1652 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1653 }
1654
1655 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1656 // the lock-step DbgRecord are identical, hoist all of them to TI.
1657 // This replicates the dbg.* intrinsic behaviour in
1658 // hoistCommonCodeFromSuccessors.
1659 while (none_of(Itrs, atEnd)) {
1660 bool HoistDVRs = allIdentical(Itrs);
1661 for (CurrentAndEndIt &Pair : Itrs) {
1662 // Increment Current iterator now as we may be about to move the
1663 // DbgRecord.
1664 DbgRecord &DR = *Pair.first++;
1665 if (HoistDVRs) {
1666 DR.removeFromParent();
1667 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1668 }
1669 }
1670 }
1671}
1672
1674 const Instruction *I2) {
1675 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1676 return true;
1677
1678 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1679 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1680 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1681 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1682 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1683
1684 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1685 return I1->getOperand(0) == I2->getOperand(1) &&
1686 I1->getOperand(1) == I2->getOperand(0) &&
1687 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1688 }
1689
1690 return false;
1691}
1692
1693/// If the target supports conditional faulting,
1694/// we look for the following pattern:
1695/// \code
1696/// BB:
1697/// ...
1698/// %cond = icmp ult %x, %y
1699/// br i1 %cond, label %TrueBB, label %FalseBB
1700/// FalseBB:
1701/// store i32 1, ptr %q, align 4
1702/// ...
1703/// TrueBB:
1704/// %maskedloadstore = load i32, ptr %b, align 4
1705/// store i32 %maskedloadstore, ptr %p, align 4
1706/// ...
1707/// \endcode
1708///
1709/// and transform it into:
1710///
1711/// \code
1712/// BB:
1713/// ...
1714/// %cond = icmp ult %x, %y
1715/// %maskedloadstore = cload i32, ptr %b, %cond
1716/// cstore i32 %maskedloadstore, ptr %p, %cond
1717/// cstore i32 1, ptr %q, ~%cond
1718/// br i1 %cond, label %TrueBB, label %FalseBB
1719/// FalseBB:
1720/// ...
1721/// TrueBB:
1722/// ...
1723/// \endcode
1724///
1725/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1726/// e.g.
1727///
1728/// \code
1729/// %vcond = bitcast i1 %cond to <1 x i1>
1730/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1731/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1732/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1733/// call void @llvm.masked.store.v1i32.p0
1734/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1735/// %cond.not = xor i1 %cond, true
1736/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1737/// call void @llvm.masked.store.v1i32.p0
1738/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1739/// \endcode
1740///
1741/// So we need to turn hoisted load/store into cload/cstore.
1742///
1743/// \param BI The branch instruction.
1744/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1745/// will be speculated.
1746/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1748 CondBrInst *BI,
1749 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1750 std::optional<bool> Invert, Instruction *Sel) {
1751 auto &Context = BI->getParent()->getContext();
1752 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1753 auto *Cond = BI->getCondition();
1754 // Construct the condition if needed.
1755 BasicBlock *BB = BI->getParent();
1756 Value *Mask = nullptr;
1757 Value *MaskFalse = nullptr;
1758 Value *MaskTrue = nullptr;
1759 if (Invert.has_value()) {
1760 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1761 Mask = Builder.CreateBitCast(
1762 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1763 VCondTy);
1764 } else {
1765 IRBuilder<> Builder(BI);
1766 MaskFalse = Builder.CreateBitCast(
1767 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1768 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1769 }
1770 auto PeekThroughBitcasts = [](Value *V) {
1771 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1772 V = BitCast->getOperand(0);
1773 return V;
1774 };
1775 for (auto *I : SpeculatedConditionalLoadsStores) {
1776 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1777 if (!Invert.has_value())
1778 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1779 // We currently assume conditional faulting load/store is supported for
1780 // scalar types only when creating new instructions. This can be easily
1781 // extended for vector types in the future.
1782 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1783 auto *Op0 = I->getOperand(0);
1784 CallInst *MaskedLoadStore = nullptr;
1785 if (auto *LI = dyn_cast<LoadInst>(I)) {
1786 // Handle Load.
1787 auto *Ty = I->getType();
1788 PHINode *PN = nullptr;
1789 Value *PassThru = nullptr;
1790 if (Invert.has_value())
1791 for (User *U : I->users()) {
1792 if ((PN = dyn_cast<PHINode>(U))) {
1793 PassThru = Builder.CreateBitCast(
1794 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1795 FixedVectorType::get(Ty, 1));
1796 } else if (auto *Ins = cast<Instruction>(U);
1797 Sel && Ins->getParent() == BB) {
1798 // This happens when store or/and a speculative instruction between
1799 // load and store were hoisted to the BB. Make sure the masked load
1800 // inserted before its use.
1801 // We assume there's one of such use.
1802 Builder.SetInsertPoint(Ins);
1803 }
1804 }
1805 MaskedLoadStore = Builder.CreateMaskedLoad(
1806 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1807 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1808 if (PN)
1809 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1810 I->replaceAllUsesWith(NewLoadStore);
1811 } else {
1812 // Handle Store.
1813 auto *StoredVal = Builder.CreateBitCast(
1814 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1815 MaskedLoadStore = Builder.CreateMaskedStore(
1816 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1817 }
1818 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1819 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1820 //
1821 // !nonnull, !align : Not support pointer type, no need to keep.
1822 // !range: Load type is changed from scalar to vector, but the metadata on
1823 // vector specifies a per-element range, so the semantics stay the
1824 // same. Keep it.
1825 // !annotation: Not impact semantics. Keep it.
1826 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1827 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1828 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1829 // FIXME: DIAssignID is not supported for masked store yet.
1830 // (Verifier::visitDIAssignIDMetadata)
1832 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1833 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1834 });
1835 MaskedLoadStore->copyMetadata(*I);
1836 I->eraseFromParent();
1837 }
1838}
1839
1841 const TargetTransformInfo &TTI) {
1842 // Not handle volatile or atomic.
1843 bool IsStore = false;
1844 if (auto *L = dyn_cast<LoadInst>(I)) {
1845 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1846 return false;
1847 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1848 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1849 return false;
1850 IsStore = true;
1851 } else
1852 return false;
1853
1854 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1855 // That's why we have the alignment limitation.
1856 // FIXME: Update the prototype of the intrinsics?
1857 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1859}
1860
1861/// Hoist any common code in the successor blocks up into the block. This
1862/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1863/// given, only perform hoisting in case all successors blocks contain matching
1864/// instructions only. In that case, all instructions can be hoisted and the
1865/// original branch will be replaced and selects for PHIs are added.
1866bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1867 bool AllInstsEqOnly) {
1868 // This does very trivial matching, with limited scanning, to find identical
1869 // instructions in the two blocks. In particular, we don't want to get into
1870 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1871 // such, we currently just scan for obviously identical instructions in an
1872 // identical order, possibly separated by the same number of non-identical
1873 // instructions.
1874 BasicBlock *BB = TI->getParent();
1875 unsigned int SuccSize = succ_size(BB);
1876 if (SuccSize < 2)
1877 return false;
1878
1879 // If either of the blocks has it's address taken, then we can't do this fold,
1880 // because the code we'd hoist would no longer run when we jump into the block
1881 // by it's address.
1882 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1883 for (auto *Succ : UniqueSuccessors) {
1884 if (Succ->hasAddressTaken())
1885 return false;
1886 // Use getUniquePredecessor instead of getSinglePredecessor to support
1887 // multi-cases successors in switch.
1888 if (Succ->getUniquePredecessor())
1889 continue;
1890 // If Succ has >1 predecessors, continue to check if the Succ contains only
1891 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1892 // can relax the condition based on the assumptiom that the program would
1893 // never enter Succ and trigger such an UB.
1894 if (isa<UnreachableInst>(*Succ->begin()))
1895 continue;
1896 return false;
1897 }
1898 // The second of pair is a SkipFlags bitmask.
1899 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1900 SmallVector<SuccIterPair, 8> SuccIterPairs;
1901 for (auto *Succ : UniqueSuccessors) {
1902 BasicBlock::iterator SuccItr = Succ->begin();
1903 if (isa<PHINode>(*SuccItr))
1904 return false;
1905 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1906 }
1907
1908 if (AllInstsEqOnly) {
1909 // Check if all instructions in the successor blocks match. This allows
1910 // hoisting all instructions and removing the blocks we are hoisting from,
1911 // so does not add any new instructions.
1912
1913 // Check if sizes and terminators of all successors match.
1914 unsigned Size0 = UniqueSuccessors[0]->size();
1915 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1916 bool AllSame =
1917 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1918 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1919 Succ->size() == Size0;
1920 });
1921 if (!AllSame)
1922 return false;
1923 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1924 while (LRI.isValid()) {
1925 Instruction *I0 = (*LRI)[0];
1926 if (any_of(*LRI, [I0](Instruction *I) {
1927 return !areIdenticalUpToCommutativity(I0, I);
1928 })) {
1929 return false;
1930 }
1931 --LRI;
1932 }
1933 // Now we know that all instructions in all successors can be hoisted. Let
1934 // the loop below handle the hoisting.
1935 }
1936
1937 // Count how many instructions were not hoisted so far. There's a limit on how
1938 // many instructions we skip, serving as a compilation time control as well as
1939 // preventing excessive increase of life ranges.
1940 unsigned NumSkipped = 0;
1941 // If we find an unreachable instruction at the beginning of a basic block, we
1942 // can still hoist instructions from the rest of the basic blocks.
1943 if (SuccIterPairs.size() > 2) {
1944 erase_if(SuccIterPairs,
1945 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1946 if (SuccIterPairs.size() < 2)
1947 return false;
1948 }
1949
1950 bool Changed = false;
1951
1952 for (;;) {
1953 auto *SuccIterPairBegin = SuccIterPairs.begin();
1954 auto &BB1ItrPair = *SuccIterPairBegin++;
1955 auto OtherSuccIterPairRange =
1956 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1957 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1958
1959 Instruction *I1 = &*BB1ItrPair.first;
1960
1961 bool AllInstsAreIdentical = true;
1962 bool HasTerminator = I1->isTerminator();
1963 for (auto &SuccIter : OtherSuccIterRange) {
1964 Instruction *I2 = &*SuccIter;
1965 HasTerminator |= I2->isTerminator();
1966 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1967 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1968 AllInstsAreIdentical = false;
1969 }
1970
1971 SmallVector<Instruction *, 8> OtherInsts;
1972 for (auto &SuccIter : OtherSuccIterRange)
1973 OtherInsts.push_back(&*SuccIter);
1974
1975 // If we are hoisting the terminator instruction, don't move one (making a
1976 // broken BB), instead clone it, and remove BI.
1977 if (HasTerminator) {
1978 // Even if BB, which contains only one unreachable instruction, is ignored
1979 // at the beginning of the loop, we can hoist the terminator instruction.
1980 // If any instructions remain in the block, we cannot hoist terminators.
1981 if (NumSkipped || !AllInstsAreIdentical) {
1982 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1983 return Changed;
1984 }
1985
1986 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1987 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1988 Changed;
1989 }
1990
1991 if (AllInstsAreIdentical) {
1992 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1993 AllInstsAreIdentical =
1994 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1995 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1996 Instruction *I2 = &*Pair.first;
1997 unsigned SkipFlagsBB2 = Pair.second;
1998 // Even if the instructions are identical, it may not
1999 // be safe to hoist them if we have skipped over
2000 // instructions with side effects or their operands
2001 // weren't hoisted.
2002 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2004 });
2005 }
2006
2007 // A musttail call must be immediately followed by a ret, so hoisting is
2008 // only legal if its ret is hoisted with it on the next iteration. That is,
2009 // no instruction has been skipped (the entire successor can be hoisted into
2010 // the predecessor) and the call is directly followed by a ret.
2011 if (auto *CI = dyn_cast<CallInst>(I1);
2012 AllInstsAreIdentical && CI && CI->isMustTailCall()) {
2013 AllInstsAreIdentical =
2014 NumSkipped == 0 && all_of(SuccIterPairs, [](const SuccIterPair &P) {
2015 return isa<ReturnInst>(*std::next(P.first));
2016 });
2017 }
2018
2019 if (AllInstsAreIdentical) {
2020 BB1ItrPair.first++;
2021 // For a normal instruction, we just move one to right before the
2022 // branch, then replace all uses of the other with the first. Finally,
2023 // we remove the now redundant second instruction.
2024 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2025 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2026 // and leave any that were not hoisted behind (by calling moveBefore
2027 // rather than moveBeforePreserving).
2028 I1->moveBefore(TI->getIterator());
2029 for (auto &SuccIter : OtherSuccIterRange) {
2030 Instruction *I2 = &*SuccIter++;
2031 assert(I2 != I1);
2032 if (!I2->use_empty())
2033 I2->replaceAllUsesWith(I1);
2034 I1->andIRFlags(I2);
2035 if (auto *CB = dyn_cast<CallBase>(I1)) {
2036 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2037 assert(Success && "We should not be trying to hoist callbases "
2038 "with non-intersectable attributes");
2039 // For NDEBUG Compile.
2040 (void)Success;
2041 }
2042
2043 combineMetadataForCSE(I1, I2, true);
2044 // I1 and I2 are being combined into a single instruction. Its debug
2045 // location is the merged locations of the original instructions.
2046 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2047 I2->eraseFromParent();
2048 }
2049 if (!Changed)
2050 NumHoistCommonCode += SuccIterPairs.size();
2051 Changed = true;
2052 NumHoistCommonInstrs += SuccIterPairs.size();
2053 } else {
2054 if (NumSkipped >= HoistCommonSkipLimit) {
2055 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2056 return Changed;
2057 }
2058 // We are about to skip over a pair of non-identical instructions. Record
2059 // if any have characteristics that would prevent reordering instructions
2060 // across them.
2061 for (auto &SuccIterPair : SuccIterPairs) {
2062 Instruction *I = &*SuccIterPair.first++;
2063 SuccIterPair.second |= skippedInstrFlags(I);
2064 }
2065 ++NumSkipped;
2066 }
2067 }
2068}
2069
2070bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2071 Instruction *TI, Instruction *I1,
2072 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2073 ArrayRef<BasicBlock *> UniqueSuccessors) {
2074
2075 auto *BI = dyn_cast<CondBrInst>(TI);
2076
2077 bool Changed = false;
2078 BasicBlock *TIParent = TI->getParent();
2079 BasicBlock *BB1 = I1->getParent();
2080
2081 // Use only for an if statement.
2082 auto *I2 = *OtherSuccTIs.begin();
2083 auto *BB2 = I2->getParent();
2084 if (BI) {
2085 assert(OtherSuccTIs.size() == 1);
2086 assert(BI->getSuccessor(0) == I1->getParent());
2087 assert(BI->getSuccessor(1) == I2->getParent());
2088 }
2089
2090 // In the case of an if statement, we try to hoist an invoke.
2091 // FIXME: Can we define a safety predicate for CallBr?
2092 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2093 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2094 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2095 return false;
2096
2097 // TODO: callbr hoisting currently disabled pending further study.
2098 if (isa<CallBrInst>(I1))
2099 return false;
2100
2101 for (BasicBlock *Succ : successors(BB1)) {
2102 for (PHINode &PN : Succ->phis()) {
2103 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2104 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2105 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2106 if (BB1V == BB2V)
2107 continue;
2108
2109 // In the case of an if statement, check for
2110 // passingValueIsAlwaysUndefined here because we would rather eliminate
2111 // undefined control flow then converting it to a select.
2112 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2114 return false;
2115 }
2116 }
2117 }
2118
2119 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2120 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2121 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2122 // Clone the terminator and hoist it into the pred, without any debug info.
2123 Instruction *NT = I1->clone();
2124 NT->insertInto(TIParent, TI->getIterator());
2125 if (!NT->getType()->isVoidTy()) {
2126 I1->replaceAllUsesWith(NT);
2127 for (Instruction *OtherSuccTI : OtherSuccTIs)
2128 OtherSuccTI->replaceAllUsesWith(NT);
2129 NT->takeName(I1);
2130 }
2131 Changed = true;
2132 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2133
2134 // Ensure terminator gets a debug location, even an unknown one, in case
2135 // it involves inlinable calls.
2137 Locs.push_back(I1->getDebugLoc());
2138 for (auto *OtherSuccTI : OtherSuccTIs)
2139 Locs.push_back(OtherSuccTI->getDebugLoc());
2140 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2141
2142 // PHIs created below will adopt NT's merged DebugLoc.
2143 IRBuilder<NoFolder> Builder(NT);
2144
2145 // In the case of an if statement, hoisting one of the terminators from our
2146 // successor is a great thing. Unfortunately, the successors of the if/else
2147 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2148 // must agree for all PHI nodes, so we insert select instruction to compute
2149 // the final result.
2150 if (BI) {
2151 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2152 for (BasicBlock *Succ : successors(BB1)) {
2153 for (PHINode &PN : Succ->phis()) {
2154 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2155 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2156 if (BB1V == BB2V)
2157 continue;
2158
2159 // These values do not agree. Insert a select instruction before NT
2160 // that determines the right value.
2161 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2162 if (!SI) {
2163 // Propagate fast-math-flags from phi node to its replacement select.
2165 BI->getCondition(), BB1V, BB2V,
2166 isa<FPMathOperator>(PN) ? &PN : nullptr,
2167 BB1V->getName() + "." + BB2V->getName(), BI));
2168 }
2169
2170 // Make the PHI node use the select for all incoming values for BB1/BB2
2171 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2172 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2173 PN.setIncomingValue(i, SI);
2174 }
2175 }
2176 }
2177
2179
2180 // Update any PHI nodes in our new successors.
2181 for (BasicBlock *Succ : successors(BB1)) {
2182 addPredecessorToBlock(Succ, TIParent, BB1);
2183 if (DTU)
2184 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2185 }
2186
2187 if (DTU) {
2188 // TI might be a switch with multi-cases destination, so we need to care for
2189 // the duplication of successors.
2190 for (BasicBlock *Succ : UniqueSuccessors)
2191 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2192 }
2193
2195 if (DTU)
2196 DTU->applyUpdates(Updates);
2197 return Changed;
2198}
2199
2200// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2201// into variables.
2203 int OpIdx) {
2204 // Divide/Remainder by constant is typically much cheaper than by variable.
2205 if (I->isIntDivRem())
2206 return OpIdx != 1;
2207 return !isa<IntrinsicInst>(I);
2208}
2209
2210// All instructions in Insts belong to different blocks that all unconditionally
2211// branch to a common successor. Analyze each instruction and return true if it
2212// would be possible to sink them into their successor, creating one common
2213// instruction instead. For every value that would be required to be provided by
2214// PHI node (because an operand varies in each input block), add to PHIOperands.
2217 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2218 // Prune out obviously bad instructions to move. Each instruction must have
2219 // the same number of uses, and we check later that the uses are consistent.
2220 std::optional<unsigned> NumUses;
2221 for (auto *I : Insts) {
2222 // These instructions may change or break semantics if moved.
2223 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2224 I->getType()->isTokenTy())
2225 return false;
2226
2227 // Do not try to sink an instruction in an infinite loop - it can cause
2228 // this algorithm to infinite loop.
2229 if (I->getParent()->getSingleSuccessor() == I->getParent())
2230 return false;
2231
2232 // Conservatively return false if I is an inline-asm instruction. Sinking
2233 // and merging inline-asm instructions can potentially create arguments
2234 // that cannot satisfy the inline-asm constraints.
2235 // If the instruction has nomerge or convergent attribute, return false.
2236 if (const auto *C = dyn_cast<CallBase>(I))
2237 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2238 return false;
2239
2240 if (!NumUses)
2241 NumUses = I->getNumUses();
2242 else if (NumUses != I->getNumUses())
2243 return false;
2244 }
2245
2246 const Instruction *I0 = Insts.front();
2247 const auto I0MMRA = MMRAMetadata(*I0);
2248 for (auto *I : Insts) {
2249 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2250 return false;
2251
2252 // Treat MMRAs conservatively. This pass can be quite aggressive and
2253 // could drop a lot of MMRAs otherwise.
2254 if (MMRAMetadata(*I) != I0MMRA)
2255 return false;
2256 }
2257
2258 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2259 // then the other phi operands must match the instructions from Insts. This
2260 // also has to hold true for any phi nodes that would be created as a result
2261 // of sinking. Both of these cases are represented by PhiOperands.
2262 for (const Use &U : I0->uses()) {
2263 auto It = PHIOperands.find(&U);
2264 if (It == PHIOperands.end())
2265 // There may be uses in other blocks when sinking into a loop header.
2266 return false;
2267 if (!equal(Insts, It->second))
2268 return false;
2269 }
2270
2271 // For calls to be sinkable, they must all be indirect, or have same callee.
2272 // I.e. if we have two direct calls to different callees, we don't want to
2273 // turn that into an indirect call. Likewise, if we have an indirect call,
2274 // and a direct call, we don't actually want to have a single indirect call.
2275 if (isa<CallBase>(I0)) {
2276 auto IsIndirectCall = [](const Instruction *I) {
2277 return cast<CallBase>(I)->isIndirectCall();
2278 };
2279 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2280 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2281 if (HaveIndirectCalls) {
2282 if (!AllCallsAreIndirect)
2283 return false;
2284 } else {
2285 // All callees must be identical.
2286 Value *Callee = nullptr;
2287 for (const Instruction *I : Insts) {
2288 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2289 if (!Callee)
2290 Callee = CurrCallee;
2291 else if (Callee != CurrCallee)
2292 return false;
2293 }
2294 }
2295 }
2296
2297 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2298 Value *Op = I0->getOperand(OI);
2299 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2300 assert(I->getNumOperands() == I0->getNumOperands());
2301 return I->getOperand(OI) == I0->getOperand(OI);
2302 };
2303 if (!all_of(Insts, SameAsI0)) {
2306 // We can't create a PHI from this GEP.
2307 return false;
2308 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2309 for (auto *I : Insts)
2310 Ops.push_back(I->getOperand(OI));
2311 }
2312 }
2313 return true;
2314}
2315
2316// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2317// instruction of every block in Blocks to their common successor, commoning
2318// into one instruction.
2320 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2321
2322 // canSinkInstructions returning true guarantees that every block has at
2323 // least one non-terminator instruction.
2325 for (auto *BB : Blocks) {
2326 Instruction *I = BB->getTerminator();
2327 I = I->getPrevNode();
2328 Insts.push_back(I);
2329 }
2330
2331 // We don't need to do any more checking here; canSinkInstructions should
2332 // have done it all for us.
2333 SmallVector<Value*, 4> NewOperands;
2334 Instruction *I0 = Insts.front();
2335 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2336 // This check is different to that in canSinkInstructions. There, we
2337 // cared about the global view once simplifycfg (and instcombine) have
2338 // completed - it takes into account PHIs that become trivially
2339 // simplifiable. However here we need a more local view; if an operand
2340 // differs we create a PHI and rely on instcombine to clean up the very
2341 // small mess we may make.
2342 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2343 return I->getOperand(O) != I0->getOperand(O);
2344 });
2345 if (!NeedPHI) {
2346 NewOperands.push_back(I0->getOperand(O));
2347 continue;
2348 }
2349
2350 // Create a new PHI in the successor block and populate it.
2351 auto *Op = I0->getOperand(O);
2352 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2353 auto *PN =
2354 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2355 PN->insertBefore(BBEnd->begin());
2356 for (auto *I : Insts)
2357 PN->addIncoming(I->getOperand(O), I->getParent());
2358 NewOperands.push_back(PN);
2359 }
2360
2361 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2362 // and move it to the start of the successor block.
2363 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2364 I0->getOperandUse(O).set(NewOperands[O]);
2365
2366 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2367
2368 // Update metadata and IR flags, and merge debug locations.
2369 for (auto *I : Insts)
2370 if (I != I0) {
2371 // The debug location for the "common" instruction is the merged locations
2372 // of all the commoned instructions. We start with the original location
2373 // of the "common" instruction and iteratively merge each location in the
2374 // loop below.
2375 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2376 // However, as N-way merge for CallInst is rare, so we use simplified API
2377 // instead of using complex API for N-way merge.
2378 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2379 combineMetadataForCSE(I0, I, true);
2380 I0->andIRFlags(I);
2381 if (auto *CB = dyn_cast<CallBase>(I0)) {
2382 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2383 assert(Success && "We should not be trying to sink callbases "
2384 "with non-intersectable attributes");
2385 // For NDEBUG Compile.
2386 (void)Success;
2387 }
2388 }
2389
2390 for (User *U : make_early_inc_range(I0->users())) {
2391 // canSinkLastInstruction checked that all instructions are only used by
2392 // phi nodes in a way that allows replacing the phi node with the common
2393 // instruction.
2394 auto *PN = cast<PHINode>(U);
2395 PN->replaceAllUsesWith(I0);
2396 PN->eraseFromParent();
2397 }
2398
2399 // Finally nuke all instructions apart from the common instruction.
2400 for (auto *I : Insts) {
2401 if (I == I0)
2402 continue;
2403 // The remaining uses are debug users, replace those with the common inst.
2404 // In most (all?) cases this just introduces a use-before-def.
2405 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2406 I->replaceAllUsesWith(I0);
2407 I->eraseFromParent();
2408 }
2409}
2410
2411/// Check whether BB's predecessors end with unconditional branches. If it is
2412/// true, sink any common code from the predecessors to BB.
2414 DomTreeUpdater *DTU) {
2415 // We support two situations:
2416 // (1) all incoming arcs are unconditional
2417 // (2) there are non-unconditional incoming arcs
2418 //
2419 // (2) is very common in switch defaults and
2420 // else-if patterns;
2421 //
2422 // if (a) f(1);
2423 // else if (b) f(2);
2424 //
2425 // produces:
2426 //
2427 // [if]
2428 // / \
2429 // [f(1)] [if]
2430 // | | \
2431 // | | |
2432 // | [f(2)]|
2433 // \ | /
2434 // [ end ]
2435 //
2436 // [end] has two unconditional predecessor arcs and one conditional. The
2437 // conditional refers to the implicit empty 'else' arc. This conditional
2438 // arc can also be caused by an empty default block in a switch.
2439 //
2440 // In this case, we attempt to sink code from all *unconditional* arcs.
2441 // If we can sink instructions from these arcs (determined during the scan
2442 // phase below) we insert a common successor for all unconditional arcs and
2443 // connect that to [end], to enable sinking:
2444 //
2445 // [if]
2446 // / \
2447 // [x(1)] [if]
2448 // | | \
2449 // | | \
2450 // | [x(2)] |
2451 // \ / |
2452 // [sink.split] |
2453 // \ /
2454 // [ end ]
2455 //
2456 SmallVector<BasicBlock*,4> UnconditionalPreds;
2457 bool HaveNonUnconditionalPredecessors = false;
2458 for (auto *PredBB : predecessors(BB)) {
2459 auto *PredBr = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2460 if (PredBr)
2461 UnconditionalPreds.push_back(PredBB);
2462 else
2463 HaveNonUnconditionalPredecessors = true;
2464 }
2465 if (UnconditionalPreds.size() < 2)
2466 return false;
2467
2468 // We take a two-step approach to tail sinking. First we scan from the end of
2469 // each block upwards in lockstep. If the n'th instruction from the end of each
2470 // block can be sunk, those instructions are added to ValuesToSink and we
2471 // carry on. If we can sink an instruction but need to PHI-merge some operands
2472 // (because they're not identical in each instruction) we add these to
2473 // PHIOperands.
2474 // We prepopulate PHIOperands with the phis that already exist in BB.
2476 for (PHINode &PN : BB->phis()) {
2478 for (const Use &U : PN.incoming_values())
2479 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2480 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2481 for (BasicBlock *Pred : UnconditionalPreds)
2482 Ops.push_back(*IncomingVals[Pred]);
2483 }
2484
2485 int ScanIdx = 0;
2486 SmallPtrSet<Value*,4> InstructionsToSink;
2487 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2488 while (LRI.isValid() &&
2489 canSinkInstructions(*LRI, PHIOperands)) {
2490 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2491 << "\n");
2492 InstructionsToSink.insert_range(*LRI);
2493 ++ScanIdx;
2494 --LRI;
2495 }
2496
2497 // If no instructions can be sunk, early-return.
2498 if (ScanIdx == 0)
2499 return false;
2500
2501 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2502
2503 if (!followedByDeoptOrUnreachable) {
2504 // Check whether this is the pointer operand of a load/store.
2505 auto IsMemOperand = [](Use &U) {
2506 auto *I = cast<Instruction>(U.getUser());
2507 if (isa<LoadInst>(I))
2508 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2509 if (isa<StoreInst>(I))
2510 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2511 return false;
2512 };
2513
2514 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2515 // actually sink before encountering instruction that is unprofitable to
2516 // sink?
2517 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2518 unsigned NumPHIInsts = 0;
2519 for (Use &U : (*LRI)[0]->operands()) {
2520 auto It = PHIOperands.find(&U);
2521 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2522 return InstructionsToSink.contains(V);
2523 })) {
2524 ++NumPHIInsts;
2525 // Do not separate a load/store from the gep producing the address.
2526 // The gep can likely be folded into the load/store as an addressing
2527 // mode. Additionally, a load of a gep is easier to analyze than a
2528 // load of a phi.
2529 if (IsMemOperand(U) &&
2530 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2531 return false;
2532 // FIXME: this check is overly optimistic. We may end up not sinking
2533 // said instruction, due to the very same profitability check.
2534 // See @creating_too_many_phis in sink-common-code.ll.
2535 }
2536 }
2537 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2538 return NumPHIInsts <= 1;
2539 };
2540
2541 // We've determined that we are going to sink last ScanIdx instructions,
2542 // and recorded them in InstructionsToSink. Now, some instructions may be
2543 // unprofitable to sink. But that determination depends on the instructions
2544 // that we are going to sink.
2545
2546 // First, forward scan: find the first instruction unprofitable to sink,
2547 // recording all the ones that are profitable to sink.
2548 // FIXME: would it be better, after we detect that not all are profitable.
2549 // to either record the profitable ones, or erase the unprofitable ones?
2550 // Maybe we need to choose (at runtime) the one that will touch least
2551 // instrs?
2552 LRI.reset();
2553 int Idx = 0;
2554 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2555 while (Idx < ScanIdx) {
2556 if (!ProfitableToSinkInstruction(LRI)) {
2557 // Too many PHIs would be created.
2558 LLVM_DEBUG(
2559 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2560 break;
2561 }
2562 InstructionsProfitableToSink.insert_range(*LRI);
2563 --LRI;
2564 ++Idx;
2565 }
2566
2567 // If no instructions can be sunk, early-return.
2568 if (Idx == 0)
2569 return false;
2570
2571 // Did we determine that (only) some instructions are unprofitable to sink?
2572 if (Idx < ScanIdx) {
2573 // Okay, some instructions are unprofitable.
2574 ScanIdx = Idx;
2575 InstructionsToSink = InstructionsProfitableToSink;
2576
2577 // But, that may make other instructions unprofitable, too.
2578 // So, do a backward scan, do any earlier instructions become
2579 // unprofitable?
2580 assert(
2581 !ProfitableToSinkInstruction(LRI) &&
2582 "We already know that the last instruction is unprofitable to sink");
2583 ++LRI;
2584 --Idx;
2585 while (Idx >= 0) {
2586 // If we detect that an instruction becomes unprofitable to sink,
2587 // all earlier instructions won't be sunk either,
2588 // so preemptively keep InstructionsProfitableToSink in sync.
2589 // FIXME: is this the most performant approach?
2590 for (auto *I : *LRI)
2591 InstructionsProfitableToSink.erase(I);
2592 if (!ProfitableToSinkInstruction(LRI)) {
2593 // Everything starting with this instruction won't be sunk.
2594 ScanIdx = Idx;
2595 InstructionsToSink = InstructionsProfitableToSink;
2596 }
2597 ++LRI;
2598 --Idx;
2599 }
2600 }
2601
2602 // If no instructions can be sunk, early-return.
2603 if (ScanIdx == 0)
2604 return false;
2605 }
2606
2607 bool Changed = false;
2608
2609 if (HaveNonUnconditionalPredecessors) {
2610 if (!followedByDeoptOrUnreachable) {
2611 // It is always legal to sink common instructions from unconditional
2612 // predecessors. However, if not all predecessors are unconditional,
2613 // this transformation might be pessimizing. So as a rule of thumb,
2614 // don't do it unless we'd sink at least one non-speculatable instruction.
2615 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2616 LRI.reset();
2617 int Idx = 0;
2618 bool Profitable = false;
2619 while (Idx < ScanIdx) {
2620 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2621 Profitable = true;
2622 break;
2623 }
2624 --LRI;
2625 ++Idx;
2626 }
2627 if (!Profitable)
2628 return false;
2629 }
2630
2631 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2632 // We have a conditional edge and we're going to sink some instructions.
2633 // Insert a new block postdominating all blocks we're going to sink from.
2634 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2635 // Edges couldn't be split.
2636 return false;
2637 Changed = true;
2638 }
2639
2640 // Now that we've analyzed all potential sinking candidates, perform the
2641 // actual sink. We iteratively sink the last non-terminator of the source
2642 // blocks into their common successor unless doing so would require too
2643 // many PHI instructions to be generated (currently only one PHI is allowed
2644 // per sunk instruction).
2645 //
2646 // We can use InstructionsToSink to discount values needing PHI-merging that will
2647 // actually be sunk in a later iteration. This allows us to be more
2648 // aggressive in what we sink. This does allow a false positive where we
2649 // sink presuming a later value will also be sunk, but stop half way through
2650 // and never actually sink it which means we produce more PHIs than intended.
2651 // This is unlikely in practice though.
2652 int SinkIdx = 0;
2653 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2654 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2655 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2656 << "\n");
2657
2658 // Because we've sunk every instruction in turn, the current instruction to
2659 // sink is always at index 0.
2660 LRI.reset();
2661
2662 sinkLastInstruction(UnconditionalPreds);
2663 NumSinkCommonInstrs++;
2664 Changed = true;
2665 }
2666 if (SinkIdx != 0)
2667 ++NumSinkCommonCode;
2668 return Changed;
2669}
2670
2671namespace {
2672
2673struct CompatibleSets {
2674 using SetTy = SmallVector<InvokeInst *, 2>;
2675
2677
2678 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2679
2680 SetTy &getCompatibleSet(InvokeInst *II);
2681
2682 void insert(InvokeInst *II);
2683};
2684
2685CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2686 // Perform a linear scan over all the existing sets, see if the new `invoke`
2687 // is compatible with any particular set. Since we know that all the `invokes`
2688 // within a set are compatible, only check the first `invoke` in each set.
2689 // WARNING: at worst, this has quadratic complexity.
2690 for (CompatibleSets::SetTy &Set : Sets) {
2691 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2692 return Set;
2693 }
2694
2695 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2696 return Sets.emplace_back();
2697}
2698
2699void CompatibleSets::insert(InvokeInst *II) {
2700 getCompatibleSet(II).emplace_back(II);
2701}
2702
2703bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2704 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2705
2706 // Can we theoretically merge these `invoke`s?
2707 auto IsIllegalToMerge = [](InvokeInst *II) {
2708 return II->cannotMerge() || II->isInlineAsm();
2709 };
2710 if (any_of(Invokes, IsIllegalToMerge))
2711 return false;
2712
2713 // Either both `invoke`s must be direct,
2714 // or both `invoke`s must be indirect.
2715 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2716 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2717 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2718 if (HaveIndirectCalls) {
2719 if (!AllCallsAreIndirect)
2720 return false;
2721 } else {
2722 // All callees must be identical.
2723 Value *Callee = nullptr;
2724 for (InvokeInst *II : Invokes) {
2725 Value *CurrCallee = II->getCalledOperand();
2726 assert(CurrCallee && "There is always a called operand.");
2727 if (!Callee)
2728 Callee = CurrCallee;
2729 else if (Callee != CurrCallee)
2730 return false;
2731 }
2732 }
2733
2734 // Either both `invoke`s must not have a normal destination,
2735 // or both `invoke`s must have a normal destination,
2736 auto HasNormalDest = [](InvokeInst *II) {
2737 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2738 };
2739 if (any_of(Invokes, HasNormalDest)) {
2740 // Do not merge `invoke` that does not have a normal destination with one
2741 // that does have a normal destination, even though doing so would be legal.
2742 if (!all_of(Invokes, HasNormalDest))
2743 return false;
2744
2745 // All normal destinations must be identical.
2746 BasicBlock *NormalBB = nullptr;
2747 for (InvokeInst *II : Invokes) {
2748 BasicBlock *CurrNormalBB = II->getNormalDest();
2749 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2750 if (!NormalBB)
2751 NormalBB = CurrNormalBB;
2752 else if (NormalBB != CurrNormalBB)
2753 return false;
2754 }
2755
2756 // In the normal destination, the incoming values for these two `invoke`s
2757 // must be compatible.
2758 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2760 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2761 &EquivalenceSet))
2762 return false;
2763 }
2764
2765#ifndef NDEBUG
2766 // All unwind destinations must be identical.
2767 // We know that because we have started from said unwind destination.
2768 BasicBlock *UnwindBB = nullptr;
2769 for (InvokeInst *II : Invokes) {
2770 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2771 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2772 if (!UnwindBB)
2773 UnwindBB = CurrUnwindBB;
2774 else
2775 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2776 }
2777#endif
2778
2779 // In the unwind destination, the incoming values for these two `invoke`s
2780 // must be compatible.
2782 Invokes.front()->getUnwindDest(),
2783 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2784 return false;
2785
2786 // Ignoring arguments, these `invoke`s must be identical,
2787 // including operand bundles.
2788 const InvokeInst *II0 = Invokes.front();
2789 for (auto *II : Invokes.drop_front())
2790 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2791 return false;
2792
2793 // Can we theoretically form the data operands for the merged `invoke`?
2794 auto IsIllegalToMergeArguments = [](auto Ops) {
2795 Use &U0 = std::get<0>(Ops);
2796 Use &U1 = std::get<1>(Ops);
2797 if (U0 == U1)
2798 return false;
2800 U0.getOperandNo());
2801 };
2802 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2803 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2804 IsIllegalToMergeArguments))
2805 return false;
2806
2807 return true;
2808}
2809
2810} // namespace
2811
2812// Merge all invokes in the provided set, all of which are compatible
2813// as per the `CompatibleSets::shouldBelongToSameSet()`.
2815 DomTreeUpdater *DTU) {
2816 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2817
2819 if (DTU)
2820 Updates.reserve(2 + 3 * Invokes.size());
2821
2822 bool HasNormalDest =
2823 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2824
2825 // Clone one of the invokes into a new basic block.
2826 // Since they are all compatible, it doesn't matter which invoke is cloned.
2827 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2828 InvokeInst *II0 = Invokes.front();
2829 BasicBlock *II0BB = II0->getParent();
2830 BasicBlock *InsertBeforeBlock =
2831 II0->getParent()->getIterator()->getNextNode();
2832 Function *Func = II0BB->getParent();
2833 LLVMContext &Ctx = II0->getContext();
2834
2835 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2836 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2837
2838 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2839 // NOTE: all invokes have the same attributes, so no handling needed.
2840 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2841
2842 if (!HasNormalDest) {
2843 // This set does not have a normal destination,
2844 // so just form a new block with unreachable terminator.
2845 BasicBlock *MergedNormalDest = BasicBlock::Create(
2846 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2847 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2848 UI->setDebugLoc(DebugLoc::getTemporary());
2849 MergedInvoke->setNormalDest(MergedNormalDest);
2850 }
2851
2852 // The unwind destination, however, remainds identical for all invokes here.
2853
2854 return MergedInvoke;
2855 }();
2856
2857 if (DTU) {
2858 // Predecessor blocks that contained these invokes will now branch to
2859 // the new block that contains the merged invoke, ...
2860 for (InvokeInst *II : Invokes)
2861 Updates.push_back(
2862 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2863
2864 // ... which has the new `unreachable` block as normal destination,
2865 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2866 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2867 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2868 SuccBBOfMergedInvoke});
2869
2870 // Since predecessor blocks now unconditionally branch to a new block,
2871 // they no longer branch to their original successors.
2872 for (InvokeInst *II : Invokes)
2873 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2874 Updates.push_back(
2875 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2876 }
2877
2878 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2879
2880 // Form the merged operands for the merged invoke.
2881 for (Use &U : MergedInvoke->operands()) {
2882 // Only PHI together the indirect callees and data operands.
2883 if (MergedInvoke->isCallee(&U)) {
2884 if (!IsIndirectCall)
2885 continue;
2886 } else if (!MergedInvoke->isDataOperand(&U))
2887 continue;
2888
2889 // Don't create trivial PHI's with all-identical incoming values.
2890 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2891 return II->getOperand(U.getOperandNo()) != U.get();
2892 });
2893 if (!NeedPHI)
2894 continue;
2895
2896 // Form a PHI out of all the data ops under this index.
2898 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2899 for (InvokeInst *II : Invokes)
2900 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2901
2902 U.set(PN);
2903 }
2904
2905 // We've ensured that each PHI node has compatible (identical) incoming values
2906 // when coming from each of the `invoke`s in the current merge set,
2907 // so update the PHI nodes accordingly.
2908 for (BasicBlock *Succ : successors(MergedInvoke))
2909 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2910 /*ExistPred=*/Invokes.front()->getParent());
2911
2912 // And finally, replace the original `invoke`s with an unconditional branch
2913 // to the block with the merged `invoke`. Also, give that merged `invoke`
2914 // the merged debugloc of all the original `invoke`s.
2915 DILocation *MergedDebugLoc = nullptr;
2916 for (InvokeInst *II : Invokes) {
2917 // Compute the debug location common to all the original `invoke`s.
2918 if (!MergedDebugLoc)
2919 MergedDebugLoc = II->getDebugLoc();
2920 else
2921 MergedDebugLoc =
2922 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2923
2924 // And replace the old `invoke` with an unconditionally branch
2925 // to the block with the merged `invoke`.
2926 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2927 OrigSuccBB->removePredecessor(II->getParent());
2928 auto *BI = UncondBrInst::Create(MergedInvoke->getParent(), II->getParent());
2929 // The unconditional branch is part of the replacement for the original
2930 // invoke, so should use its DebugLoc.
2931 BI->setDebugLoc(II->getDebugLoc());
2932 bool Success = MergedInvoke->tryIntersectAttributes(II);
2933 assert(Success && "Merged invokes with incompatible attributes");
2934 // For NDEBUG Compile
2935 (void)Success;
2936 II->replaceAllUsesWith(MergedInvoke);
2937 II->eraseFromParent();
2938 ++NumInvokesMerged;
2939 }
2940 MergedInvoke->setDebugLoc(MergedDebugLoc);
2941 ++NumInvokeSetsFormed;
2942
2943 if (DTU)
2944 DTU->applyUpdates(Updates);
2945}
2946
2947/// If this block is a `landingpad` exception handling block, categorize all
2948/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2949/// being "mergeable" together, and then merge invokes in each set together.
2950///
2951/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2952/// [...] [...]
2953/// | |
2954/// [invoke0] [invoke1]
2955/// / \ / \
2956/// [cont0] [landingpad] [cont1]
2957/// to:
2958/// [...] [...]
2959/// \ /
2960/// [invoke]
2961/// / \
2962/// [cont] [landingpad]
2963///
2964/// But of course we can only do that if the invokes share the `landingpad`,
2965/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2966/// and the invoked functions are "compatible".
2969 return false;
2970
2971 bool Changed = false;
2972
2973 // FIXME: generalize to all exception handling blocks?
2974 if (!BB->isLandingPad())
2975 return Changed;
2976
2977 CompatibleSets Grouper;
2978
2979 // Record all the predecessors of this `landingpad`. As per verifier,
2980 // the only allowed predecessor is the unwind edge of an `invoke`.
2981 // We want to group "compatible" `invokes` into the same set to be merged.
2982 for (BasicBlock *PredBB : predecessors(BB))
2983 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2984
2985 // And now, merge `invoke`s that were grouped togeter.
2986 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2987 if (Invokes.size() < 2)
2988 continue;
2989 Changed = true;
2990 mergeCompatibleInvokesImpl(Invokes, DTU);
2991 }
2992
2993 return Changed;
2994}
2995
2996namespace {
2997/// Track ephemeral values, which should be ignored for cost-modelling
2998/// purposes. Requires walking instructions in reverse order.
2999class EphemeralValueTracker {
3000 SmallPtrSet<const Instruction *, 32> EphValues;
3001
3002 bool isEphemeral(const Instruction *I) {
3003 if (isa<AssumeInst>(I))
3004 return true;
3005 return !I->mayHaveSideEffects() && !I->isTerminator() &&
3006 all_of(I->users(), [&](const User *U) {
3007 return EphValues.count(cast<Instruction>(U));
3008 });
3009 }
3010
3011public:
3012 bool track(const Instruction *I) {
3013 if (isEphemeral(I)) {
3014 EphValues.insert(I);
3015 return true;
3016 }
3017 return false;
3018 }
3019
3020 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3021};
3022} // namespace
3023
3024/// Determine if we can hoist sink a sole store instruction out of a
3025/// conditional block.
3026///
3027/// We are looking for code like the following:
3028/// BrBB:
3029/// store i32 %add, i32* %arrayidx2
3030/// ... // No other stores or function calls (we could be calling a memory
3031/// ... // function).
3032/// %cmp = icmp ult %x, %y
3033/// br i1 %cmp, label %EndBB, label %ThenBB
3034/// ThenBB:
3035/// store i32 %add5, i32* %arrayidx2
3036/// br label EndBB
3037/// EndBB:
3038/// ...
3039/// We are going to transform this into:
3040/// BrBB:
3041/// store i32 %add, i32* %arrayidx2
3042/// ... //
3043/// %cmp = icmp ult %x, %y
3044/// %add.add5 = select i1 %cmp, i32 %add, %add5
3045/// store i32 %add.add5, i32* %arrayidx2
3046/// ...
3047///
3048/// \return The pointer to the value of the previous store if the store can be
3049/// hoisted into the predecessor block. 0 otherwise.
3051 BasicBlock *StoreBB, BasicBlock *EndBB) {
3052 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3053 if (!StoreToHoist)
3054 return nullptr;
3055
3056 // Volatile or atomic.
3057 if (!StoreToHoist->isSimple())
3058 return nullptr;
3059
3060 Value *StorePtr = StoreToHoist->getPointerOperand();
3061 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3062
3063 // Look for a store to the same pointer in BrBB.
3064 unsigned MaxNumInstToLookAt = 9;
3065 // Skip pseudo probe intrinsic calls which are not really killing any memory
3066 // accesses.
3067 for (Instruction &CurI : reverse(*BrBB)) {
3068 if (!MaxNumInstToLookAt)
3069 break;
3070 --MaxNumInstToLookAt;
3071
3072 if (isa<PseudoProbeInst>(CurI))
3073 continue;
3074
3075 // Could be calling an instruction that affects memory like free().
3076 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3077 return nullptr;
3078
3079 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3080 // Found the previous store to same location and type. Make sure it is
3081 // simple, to avoid introducing a spurious non-atomic write after an
3082 // atomic write.
3083 if (SI->getPointerOperand() == StorePtr &&
3084 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3085 SI->getAlign() >= StoreToHoist->getAlign())
3086 // Found the previous store, return its value operand.
3087 return SI->getValueOperand();
3088 return nullptr; // Unknown store.
3089 }
3090
3091 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3092 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3093 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3094 Value *Obj = getUnderlyingObject(StorePtr);
3095 bool ExplicitlyDereferenceableOnly;
3096 // The dereferenceability query here is only required to satisfy the
3097 // writable contract, actual dereferenceability is proven by the
3098 // presence of an access. As such, we can ignore frees.
3099 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3102 .WithoutRet) &&
3103 (!ExplicitlyDereferenceableOnly ||
3104 isDereferenceablePointer(StorePtr, StoreTy, LI->getDataLayout(),
3105 /*IgnoreFree=*/true))) {
3106 // Found a previous load, return it.
3107 return LI;
3108 }
3109 }
3110 // The load didn't work out, but we may still find a store.
3111 }
3112 }
3113
3114 return nullptr;
3115}
3116
3117/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3118/// converted to selects.
3120 BasicBlock *EndBB,
3121 unsigned &SpeculatedInstructions,
3122 InstructionCost &Cost,
3123 const TargetTransformInfo &TTI) {
3125 BB->getParent()->hasMinSize()
3128
3129 bool HaveRewritablePHIs = false;
3130 for (PHINode &PN : EndBB->phis()) {
3131 Value *OrigV = PN.getIncomingValueForBlock(BB);
3132 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3133
3134 // FIXME: Try to remove some of the duplication with
3135 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3136 if (ThenV == OrigV)
3137 continue;
3138
3139 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3140 CmpInst::makeCmpResultType(PN.getType()),
3142
3143 // Don't convert to selects if we could remove undefined behavior instead.
3144 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3146 return false;
3147
3148 HaveRewritablePHIs = true;
3149 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3150 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3151 if (!OrigCE && !ThenCE)
3152 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3153
3154 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3155 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3156 InstructionCost MaxCost =
3158 if (OrigCost + ThenCost > MaxCost)
3159 return false;
3160
3161 // Account for the cost of an unfolded ConstantExpr which could end up
3162 // getting expanded into Instructions.
3163 // FIXME: This doesn't account for how many operations are combined in the
3164 // constant expression.
3165 ++SpeculatedInstructions;
3166 if (SpeculatedInstructions > 1)
3167 return false;
3168 }
3169
3170 return HaveRewritablePHIs;
3171}
3172
3174 std::optional<bool> Invert,
3175 const TargetTransformInfo &TTI) {
3176 // If the branch is non-unpredictable, and is predicted to *not* branch to
3177 // the `then` block, then avoid speculating it.
3178 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3179 return true;
3180
3181 uint64_t TWeight, FWeight;
3182 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3183 return true;
3184
3185 if (!Invert.has_value())
3186 return false;
3187
3188 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3189 BranchProbability BIEndProb =
3190 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3191 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3192 return BIEndProb < Likely;
3193}
3194
3195/// Speculate a conditional basic block flattening the CFG.
3196///
3197/// Note that this is a very risky transform currently. Speculating
3198/// instructions like this is most often not desirable. Instead, there is an MI
3199/// pass which can do it with full awareness of the resource constraints.
3200/// However, some cases are "obvious" and we should do directly. An example of
3201/// this is speculating a single, reasonably cheap instruction.
3202///
3203/// There is only one distinct advantage to flattening the CFG at the IR level:
3204/// it makes very common but simplistic optimizations such as are common in
3205/// instcombine and the DAG combiner more powerful by removing CFG edges and
3206/// modeling their effects with easier to reason about SSA value graphs.
3207///
3208///
3209/// An illustration of this transform is turning this IR:
3210/// \code
3211/// BB:
3212/// %cmp = icmp ult %x, %y
3213/// br i1 %cmp, label %EndBB, label %ThenBB
3214/// ThenBB:
3215/// %sub = sub %x, %y
3216/// br label BB2
3217/// EndBB:
3218/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3219/// ...
3220/// \endcode
3221///
3222/// Into this IR:
3223/// \code
3224/// BB:
3225/// %cmp = icmp ult %x, %y
3226/// %sub = sub %x, %y
3227/// %cond = select i1 %cmp, 0, %sub
3228/// ...
3229/// \endcode
3230///
3231/// \returns true if the conditional block is removed.
3232bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3233 BasicBlock *ThenBB) {
3234 if (!Options.SpeculateBlocks)
3235 return false;
3236
3237 // Be conservative for now. FP select instruction can often be expensive.
3238 Value *BrCond = BI->getCondition();
3239 if (isa<FCmpInst>(BrCond))
3240 return false;
3241
3242 BasicBlock *BB = BI->getParent();
3243 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3244 InstructionCost Budget =
3246
3247 // If ThenBB is actually on the false edge of the conditional branch, remember
3248 // to swap the select operands later.
3249 bool Invert = false;
3250 if (ThenBB != BI->getSuccessor(0)) {
3251 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3252 Invert = true;
3253 }
3254 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3255
3256 if (!isProfitableToSpeculate(BI, Invert, TTI))
3257 return false;
3258
3259 // Keep a count of how many times instructions are used within ThenBB when
3260 // they are candidates for sinking into ThenBB. Specifically:
3261 // - They are defined in BB, and
3262 // - They have no side effects, and
3263 // - All of their uses are in ThenBB.
3264 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3265
3266 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3267
3268 unsigned SpeculatedInstructions = 0;
3269 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3270 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3271 Value *SpeculatedStoreValue = nullptr;
3272 StoreInst *SpeculatedStore = nullptr;
3273 EphemeralValueTracker EphTracker;
3274 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3275 // Skip pseudo probes. The consequence is we lose track of the branch
3276 // probability for ThenBB, which is fine since the optimization here takes
3277 // place regardless of the branch probability.
3278 if (isa<PseudoProbeInst>(I)) {
3279 // The probe should be deleted so that it will not be over-counted when
3280 // the samples collected on the non-conditional path are counted towards
3281 // the conditional path. We leave it for the counts inference algorithm to
3282 // figure out a proper count for an unknown probe.
3283 SpeculatedPseudoProbes.push_back(&I);
3284 continue;
3285 }
3286
3287 // Ignore ephemeral values, they will be dropped by the transform.
3288 if (EphTracker.track(&I))
3289 continue;
3290
3291 // Only speculatively execute a single instruction (not counting the
3292 // terminator) for now.
3293 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3295 SpeculatedConditionalLoadsStores.size() <
3297 // Not count load/store into cost if target supports conditional faulting
3298 // b/c it's cheap to speculate it.
3299 if (IsSafeCheapLoadStore)
3300 SpeculatedConditionalLoadsStores.push_back(&I);
3301 else
3302 ++SpeculatedInstructions;
3303
3304 if (SpeculatedInstructions > 1)
3305 return false;
3306
3307 // Don't hoist the instruction if it's unsafe or expensive.
3308 if (!IsSafeCheapLoadStore &&
3310 !(HoistCondStores && !SpeculatedStoreValue &&
3311 (SpeculatedStoreValue =
3312 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3313 return false;
3314 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3317 return false;
3318
3319 // Store the store speculation candidate.
3320 if (!SpeculatedStore && SpeculatedStoreValue)
3321 SpeculatedStore = cast<StoreInst>(&I);
3322
3323 // Do not hoist the instruction if any of its operands are defined but not
3324 // used in BB. The transformation will prevent the operand from
3325 // being sunk into the use block.
3326 for (Use &Op : I.operands()) {
3328 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3329 continue; // Not a candidate for sinking.
3330
3331 ++SinkCandidateUseCounts[OpI];
3332 }
3333 }
3334
3335 // Consider any sink candidates which are only used in ThenBB as costs for
3336 // speculation. Note, while we iterate over a DenseMap here, we are summing
3337 // and so iteration order isn't significant.
3338 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3339 if (Inst->hasNUses(Count)) {
3340 ++SpeculatedInstructions;
3341 if (SpeculatedInstructions > 1)
3342 return false;
3343 }
3344
3345 // Check that we can insert the selects and that it's not too expensive to do
3346 // so.
3347 bool Convert =
3348 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3350 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3351 SpeculatedInstructions, Cost, TTI);
3352 if (!Convert || Cost > Budget)
3353 return false;
3354
3355 // If we get here, we can hoist the instruction and if-convert.
3356 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3357
3358 Instruction *Sel = nullptr;
3359 // Insert a select of the value of the speculated store.
3360 if (SpeculatedStoreValue) {
3361 IRBuilder<NoFolder> Builder(BI);
3362 Value *OrigV = SpeculatedStore->getValueOperand();
3363 Value *TrueV = SpeculatedStore->getValueOperand();
3364 Value *FalseV = SpeculatedStoreValue;
3365 if (Invert)
3366 std::swap(TrueV, FalseV);
3367 Value *S = Builder.CreateSelect(
3368 BrCond, TrueV, FalseV, "spec.store.select", BI);
3369 Sel = cast<Instruction>(S);
3370 SpeculatedStore->setOperand(0, S);
3371 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3372 SpeculatedStore->getDebugLoc());
3373 // The value stored is still conditional, but the store itself is now
3374 // unconditionally executed, so we must be sure that any linked dbg.assign
3375 // intrinsics are tracking the new stored value (the result of the
3376 // select). If we don't, and the store were to be removed by another pass
3377 // (e.g. DSE), then we'd eventually end up emitting a location describing
3378 // the conditional value, unconditionally.
3379 //
3380 // === Before this transformation ===
3381 // pred:
3382 // store %one, %x.dest, !DIAssignID !1
3383 // dbg.assign %one, "x", ..., !1, ...
3384 // br %cond if.then
3385 //
3386 // if.then:
3387 // store %two, %x.dest, !DIAssignID !2
3388 // dbg.assign %two, "x", ..., !2, ...
3389 //
3390 // === After this transformation ===
3391 // pred:
3392 // store %one, %x.dest, !DIAssignID !1
3393 // dbg.assign %one, "x", ..., !1
3394 /// ...
3395 // %merge = select %cond, %two, %one
3396 // store %merge, %x.dest, !DIAssignID !2
3397 // dbg.assign %merge, "x", ..., !2
3398 for (DbgVariableRecord *DbgAssign :
3399 at::getDVRAssignmentMarkers(SpeculatedStore))
3400 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3401 DbgAssign->replaceVariableLocationOp(OrigV, S);
3402 }
3403
3404 // Metadata can be dependent on the condition we are hoisting above.
3405 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3406 // to avoid making it appear as if the condition is a constant, which would
3407 // be misleading while debugging.
3408 // Similarly strip attributes that maybe dependent on condition we are
3409 // hoisting above.
3410 for (auto &I : make_early_inc_range(*ThenBB)) {
3411 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3412 I.dropLocation();
3413 }
3414 I.dropUBImplyingAttrsAndMetadata();
3415
3416 // Drop ephemeral values.
3417 if (EphTracker.contains(&I)) {
3418 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3419 I.eraseFromParent();
3420 }
3421 }
3422
3423 // Hoist the instructions.
3424 // Drop DbgVariableRecords attached to these instructions.
3425 for (auto &It : *ThenBB)
3426 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3427 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3428 // equivalent).
3429 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3430 !DVR || !DVR->isDbgAssign())
3431 It.dropOneDbgRecord(&DR);
3432 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3433 std::prev(ThenBB->end()));
3434
3435 if (!SpeculatedConditionalLoadsStores.empty())
3436 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3437 Sel);
3438
3439 // Insert selects and rewrite the PHI operands.
3440 IRBuilder<NoFolder> Builder(BI);
3441 for (PHINode &PN : EndBB->phis()) {
3442 unsigned OrigI = PN.getBasicBlockIndex(BB);
3443 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3444 Value *OrigV = PN.getIncomingValue(OrigI);
3445 Value *ThenV = PN.getIncomingValue(ThenI);
3446
3447 // Skip PHIs which are trivial.
3448 if (OrigV == ThenV)
3449 continue;
3450
3451 // Create a select whose true value is the speculatively executed value and
3452 // false value is the pre-existing value. Swap them if the branch
3453 // destinations were inverted.
3454 Value *TrueV = ThenV, *FalseV = OrigV;
3455 if (Invert)
3456 std::swap(TrueV, FalseV);
3457 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3458 PN.setIncomingValue(OrigI, V);
3459 PN.setIncomingValue(ThenI, V);
3460 }
3461
3462 // Remove speculated pseudo probes.
3463 for (Instruction *I : SpeculatedPseudoProbes)
3464 I->eraseFromParent();
3465
3466 ++NumSpeculations;
3467 return true;
3468}
3469
3471
3472// Return false if number of blocks searched is too much.
3473static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3474 BlocksSet &ReachesNonLocalUses) {
3475 if (BB == DefBB)
3476 return true;
3477 if (!ReachesNonLocalUses.insert(BB).second)
3478 return true;
3479
3480 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3481 return false;
3482 for (BasicBlock *Pred : predecessors(BB))
3483 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3484 return false;
3485 return true;
3486}
3487
3488/// Return true if we can thread a branch across this block.
3490 BlocksSet &NonLocalUseBlocks) {
3491 int Size = 0;
3492 EphemeralValueTracker EphTracker;
3493
3494 // Walk the loop in reverse so that we can identify ephemeral values properly
3495 // (values only feeding assumes).
3496 for (Instruction &I : reverse(*BB)) {
3497 // Can't fold blocks that contain noduplicate or convergent calls.
3498 if (CallInst *CI = dyn_cast<CallInst>(&I))
3499 if (CI->cannotDuplicate() || CI->isConvergent())
3500 return false;
3501
3502 // Ignore ephemeral values which are deleted during codegen.
3503 // We will delete Phis while threading, so Phis should not be accounted in
3504 // block's size.
3505 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3506 if (Size++ > MaxSmallBlockSize)
3507 return false; // Don't clone large BB's.
3508 }
3509
3510 // Record blocks with non-local uses of values defined in the current basic
3511 // block.
3512 for (User *U : I.users()) {
3514 BasicBlock *UsedInBB = UI->getParent();
3515 if (UsedInBB == BB) {
3516 if (isa<PHINode>(UI))
3517 return false;
3518 } else
3519 NonLocalUseBlocks.insert(UsedInBB);
3520 }
3521
3522 // Looks ok, continue checking.
3523 }
3524
3525 return true;
3526}
3527
3529 BasicBlock *To) {
3530 // Don't look past the block defining the value, we might get the value from
3531 // a previous loop iteration.
3532 auto *I = dyn_cast<Instruction>(V);
3533 if (I && I->getParent() == To)
3534 return nullptr;
3535
3536 // We know the value if the From block branches on it.
3537 auto *BI = dyn_cast<CondBrInst>(From->getTerminator());
3538 if (BI && BI->getCondition() == V &&
3539 BI->getSuccessor(0) != BI->getSuccessor(1))
3540 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3541 : ConstantInt::getFalse(BI->getContext());
3542
3543 return nullptr;
3544}
3545
3546/// If we have a conditional branch on something for which we know the constant
3547/// value in predecessors (e.g. a phi node in the current block), thread edges
3548/// from the predecessor to their ultimate destination.
3549static std::optional<bool>
3551 const DataLayout &DL,
3552 AssumptionCache *AC) {
3554 BasicBlock *BB = BI->getParent();
3555 Value *Cond = BI->getCondition();
3557 if (PN && PN->getParent() == BB) {
3558 // Degenerate case of a single entry PHI.
3559 if (PN->getNumIncomingValues() == 1) {
3561 return true;
3562 }
3563
3564 for (Use &U : PN->incoming_values())
3565 if (auto *CB = dyn_cast<ConstantInt>(U))
3566 KnownValues[CB].insert(PN->getIncomingBlock(U));
3567 } else {
3568 for (BasicBlock *Pred : predecessors(BB)) {
3569 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3570 KnownValues[CB].insert(Pred);
3571 }
3572 }
3573
3574 if (KnownValues.empty())
3575 return false;
3576
3577 // Now we know that this block has multiple preds and two succs.
3578 // Check that the block is small enough and record which non-local blocks use
3579 // values defined in the block.
3580
3581 BlocksSet NonLocalUseBlocks;
3582 BlocksSet ReachesNonLocalUseBlocks;
3583 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3584 return false;
3585
3586 // Jump-threading can only be done to destinations where no values defined
3587 // in BB are live.
3588
3589 // Quickly check if both destinations have uses. If so, jump-threading cannot
3590 // be done.
3591 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3592 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3593 return false;
3594
3595 // Search backward from NonLocalUseBlocks to find which blocks
3596 // reach non-local uses.
3597 for (BasicBlock *UseBB : NonLocalUseBlocks)
3598 // Give up if too many blocks are searched.
3599 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3600 return false;
3601
3602 for (const auto &Pair : KnownValues) {
3603 ConstantInt *CB = Pair.first;
3604 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3605 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3606
3607 // Okay, we now know that all edges from PredBB should be revectored to
3608 // branch to RealDest.
3609 if (RealDest == BB)
3610 continue; // Skip self loops.
3611
3612 // Skip if the predecessor's terminator is an indirect branch.
3613 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3614 return isa<IndirectBrInst>(PredBB->getTerminator());
3615 }))
3616 continue;
3617
3618 // Only revector to RealDest if no values defined in BB are live.
3619 if (ReachesNonLocalUseBlocks.contains(RealDest))
3620 continue;
3621
3622 LLVM_DEBUG({
3623 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3624 << " has value " << *Pair.first << " in predecessors:\n";
3625 for (const BasicBlock *PredBB : Pair.second)
3626 dbgs() << " " << PredBB->getName() << "\n";
3627 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3628 });
3629
3630 // Split the predecessors we are threading into a new edge block. We'll
3631 // clone the instructions into this block, and then redirect it to RealDest.
3632 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3633 if (!EdgeBB)
3634 continue;
3635
3636 // TODO: These just exist to reduce test diff, we can drop them if we like.
3637 EdgeBB->setName(RealDest->getName() + ".critedge");
3638 EdgeBB->moveBefore(RealDest);
3639
3640 // Update PHI nodes.
3641 addPredecessorToBlock(RealDest, EdgeBB, BB);
3642
3643 // BB may have instructions that are being threaded over. Clone these
3644 // instructions into EdgeBB. We know that there will be no uses of the
3645 // cloned instructions outside of EdgeBB.
3646 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3647 ValueToValueMapTy TranslateMap; // Track translated values.
3648 TranslateMap[Cond] = CB;
3649
3650 // RemoveDIs: track instructions that we optimise away while folding, so
3651 // that we can copy DbgVariableRecords from them later.
3652 BasicBlock::iterator SrcDbgCursor = BB->begin();
3653 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3654 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3655 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3656 continue;
3657 }
3658 // Clone the instruction.
3659 Instruction *N = BBI->clone();
3660 // Insert the new instruction into its new home.
3661 N->insertInto(EdgeBB, InsertPt);
3662
3663 if (BBI->hasName())
3664 N->setName(BBI->getName() + ".c");
3665
3666 // Update operands due to translation.
3667 // Key Instructions: Remap all the atom groups.
3668 if (const DebugLoc &DL = BBI->getDebugLoc())
3669 mapAtomInstance(DL, TranslateMap);
3670 RemapInstruction(N, TranslateMap,
3672
3673 // Check for trivial simplification.
3674 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3675 if (!BBI->use_empty())
3676 TranslateMap[&*BBI] = V;
3677 if (!N->mayHaveSideEffects()) {
3678 N->eraseFromParent(); // Instruction folded away, don't need actual
3679 // inst
3680 N = nullptr;
3681 }
3682 } else {
3683 if (!BBI->use_empty())
3684 TranslateMap[&*BBI] = N;
3685 }
3686 if (N) {
3687 // Copy all debug-info attached to instructions from the last we
3688 // successfully clone, up to this instruction (they might have been
3689 // folded away).
3690 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3691 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3692 SrcDbgCursor = std::next(BBI);
3693 // Clone debug-info on this instruction too.
3694 N->cloneDebugInfoFrom(&*BBI);
3695
3696 // Register the new instruction with the assumption cache if necessary.
3697 if (auto *Assume = dyn_cast<AssumeInst>(N))
3698 if (AC)
3699 AC->registerAssumption(Assume);
3700 }
3701 }
3702
3703 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3704 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3705 InsertPt->cloneDebugInfoFrom(BI);
3706
3707 BB->removePredecessor(EdgeBB);
3708 UncondBrInst *EdgeBI = cast<UncondBrInst>(EdgeBB->getTerminator());
3709 EdgeBI->setSuccessor(0, RealDest);
3710 EdgeBI->setDebugLoc(BI->getDebugLoc());
3711
3712 if (DTU) {
3714 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3715 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3716 DTU->applyUpdates(Updates);
3717 }
3718
3719 // For simplicity, we created a separate basic block for the edge. Merge
3720 // it back into the predecessor if possible. This not only avoids
3721 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3722 // bypass the check for trivial cycles above.
3723 MergeBlockIntoPredecessor(EdgeBB, DTU);
3724
3725 // Signal repeat, simplifying any other constants.
3726 return std::nullopt;
3727 }
3728
3729 return false;
3730}
3731
3732bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3733 // Note: If BB is a loop header then there is a risk that threading introduces
3734 // a non-canonical loop by moving a back edge. So we avoid this optimization
3735 // for loop headers if NeedCanonicalLoop is set.
3736 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3737 return false;
3738
3739 std::optional<bool> Result;
3740 bool EverChanged = false;
3741 do {
3742 // Note that None means "we changed things, but recurse further."
3743 Result =
3745 EverChanged |= Result == std::nullopt || *Result;
3746 } while (Result == std::nullopt);
3747 return EverChanged;
3748}
3749
3750/// Given a BB that starts with the specified two-entry PHI node,
3751/// see if we can eliminate it.
3754 const DataLayout &DL,
3755 bool SpeculateUnpredictables) {
3756 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3757 // statement", which has a very simple dominance structure. Basically, we
3758 // are trying to find the condition that is being branched on, which
3759 // subsequently causes this merge to happen. We really want control
3760 // dependence information for this check, but simplifycfg can't keep it up
3761 // to date, and this catches most of the cases we care about anyway.
3762 BasicBlock *BB = PN->getParent();
3763
3764 BasicBlock *IfTrue, *IfFalse;
3765 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3766 if (!DomBI)
3767 return false;
3768 Value *IfCond = DomBI->getCondition();
3769 // Don't bother if the branch will be constant folded trivially.
3770 if (isa<ConstantInt>(IfCond))
3771 return false;
3772
3773 BasicBlock *DomBlock = DomBI->getParent();
3775 llvm::copy_if(PN->blocks(), std::back_inserter(IfBlocks),
3776 [](BasicBlock *IfBlock) {
3777 return isa<UncondBrInst>(IfBlock->getTerminator());
3778 });
3779 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3780 "Will have either one or two blocks to speculate.");
3781
3782 // If the branch is non-unpredictable, see if we either predictably jump to
3783 // the merge bb (if we have only a single 'then' block), or if we predictably
3784 // jump to one specific 'then' block (if we have two of them).
3785 // It isn't beneficial to speculatively execute the code
3786 // from the block that we know is predictably not entered.
3787 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3788 if (!IsUnpredictable) {
3789 uint64_t TWeight, FWeight;
3790 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3791 (TWeight + FWeight) != 0) {
3792 BranchProbability BITrueProb =
3793 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3794 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3795 BranchProbability BIFalseProb = BITrueProb.getCompl();
3796 if (IfBlocks.size() == 1) {
3797 BranchProbability BIBBProb =
3798 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3799 if (BIBBProb >= Likely)
3800 return false;
3801 } else {
3802 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3803 return false;
3804 }
3805 }
3806 }
3807
3808 // Don't try to fold an unreachable block. For example, the phi node itself
3809 // can't be the candidate if-condition for a select that we want to form.
3810 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3811 if (IfCondPhiInst->getParent() == BB)
3812 return false;
3813
3814 // Okay, we found that we can merge this two-entry phi node into a select.
3815 // Doing so would require us to fold *all* two entry phi nodes in this block.
3816 // At some point this becomes non-profitable (particularly if the target
3817 // doesn't support cmov's). Only do this transformation if there are two or
3818 // fewer PHI nodes in this block.
3819 unsigned NumPhis = 0;
3820 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3821 if (NumPhis > 2)
3822 return false;
3823
3824 // Loop over the PHI's seeing if we can promote them all to select
3825 // instructions. While we are at it, keep track of the instructions
3826 // that need to be moved to the dominating block.
3827 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3828 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3829 InstructionCost Cost = 0;
3830 InstructionCost Budget =
3832 if (SpeculateUnpredictables && IsUnpredictable)
3833 Budget += TTI.getBranchMispredictPenalty();
3834
3835 bool Changed = false;
3836 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3837 PHINode *PN = cast<PHINode>(II++);
3838 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3839 PN->replaceAllUsesWith(V);
3840 PN->eraseFromParent();
3841 Changed = true;
3842 continue;
3843 }
3844
3845 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3846 AggressiveInsts, Cost, Budget, TTI, AC,
3847 ZeroCostInstructions) ||
3848 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3849 AggressiveInsts, Cost, Budget, TTI, AC,
3850 ZeroCostInstructions))
3851 return Changed;
3852 }
3853
3854 // If we folded the first phi, PN dangles at this point. Refresh it. If
3855 // we ran out of PHIs then we simplified them all.
3856 PN = dyn_cast<PHINode>(BB->begin());
3857 if (!PN)
3858 return true;
3859
3860 // Don't fold i1 branches on PHIs which contain binary operators or
3861 // (possibly inverted) select form of or/ands if their parameters are
3862 // an equality test.
3863 auto IsBinOpOrAndEq = [](Value *V) {
3864 CmpPredicate Pred;
3865 if (match(V, m_CombineOr(
3867 m_BinOp(m_Cmp(Pred, m_Value(), m_Value()), m_Value()),
3868 m_BinOp(m_Value(), m_Cmp(Pred, m_Value(), m_Value()))),
3870 m_Cmp(Pred, m_Value(), m_Value()))))) {
3871 return CmpInst::isEquality(Pred);
3872 }
3873 return false;
3874 };
3875 if (PN->getType()->isIntegerTy(1) &&
3876 (IsBinOpOrAndEq(PN->getIncomingValue(0)) ||
3877 IsBinOpOrAndEq(PN->getIncomingValue(1)) || IsBinOpOrAndEq(IfCond)))
3878 return Changed;
3879
3880 // If all PHI nodes are promotable, check to make sure that all instructions
3881 // in the predecessor blocks can be promoted as well. If not, we won't be able
3882 // to get rid of the control flow, so it's not worth promoting to select
3883 // instructions.
3884 for (BasicBlock *IfBlock : IfBlocks)
3885 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3886 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3887 // This is not an aggressive instruction that we can promote.
3888 // Because of this, we won't be able to get rid of the control flow, so
3889 // the xform is not worth it.
3890 return Changed;
3891 }
3892
3893 // If either of the blocks has it's address taken, we can't do this fold.
3894 if (any_of(IfBlocks,
3895 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3896 return Changed;
3897
3898 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3899 if (IsUnpredictable) dbgs() << " (unpredictable)";
3900 dbgs() << " T: " << IfTrue->getName()
3901 << " F: " << IfFalse->getName() << "\n");
3902
3903 // If we can still promote the PHI nodes after this gauntlet of tests,
3904 // do all of the PHI's now.
3905
3906 // Move all 'aggressive' instructions, which are defined in the
3907 // conditional parts of the if's up to the dominating block.
3908 for (BasicBlock *IfBlock : IfBlocks)
3909 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3910
3911 IRBuilder<NoFolder> Builder(DomBI);
3912 // Propagate fast-math-flags from phi nodes to replacement selects.
3913 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3914 // Change the PHI node into a select instruction.
3915 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3916 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3917
3918 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3919 isa<FPMathOperator>(PN) ? PN : nullptr,
3920 "", DomBI);
3921 PN->replaceAllUsesWith(Sel);
3922 Sel->takeName(PN);
3923 PN->eraseFromParent();
3924 }
3925
3926 // At this point, all IfBlocks are empty, so our if statement
3927 // has been flattened. Change DomBlock to jump directly to our new block to
3928 // avoid other simplifycfg's kicking in on the diamond.
3929 Builder.CreateBr(BB);
3930
3932 if (DTU) {
3933 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3934 for (auto *Successor : successors(DomBlock))
3935 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3936 }
3937
3938 DomBI->eraseFromParent();
3939 if (DTU)
3940 DTU->applyUpdates(Updates);
3941
3942 return true;
3943}
3944
3947 Value *RHS, const Twine &Name = "") {
3948 // Try to relax logical op to binary op.
3949 if (impliesPoison(RHS, LHS))
3950 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3951 if (Opc == Instruction::And)
3952 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3953 if (Opc == Instruction::Or)
3954 return Builder.CreateLogicalOr(LHS, RHS, Name);
3955 llvm_unreachable("Invalid logical opcode");
3956}
3957
3958/// Return true if either PBI or BI has branch weight available, and store
3959/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3960/// not have branch weight, use 1:1 as its weight.
3962 uint64_t &PredTrueWeight,
3963 uint64_t &PredFalseWeight,
3964 uint64_t &SuccTrueWeight,
3965 uint64_t &SuccFalseWeight) {
3966 bool PredHasWeights =
3967 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3968 bool SuccHasWeights =
3969 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3970 if (PredHasWeights || SuccHasWeights) {
3971 if (!PredHasWeights)
3972 PredTrueWeight = PredFalseWeight = 1;
3973 if (!SuccHasWeights)
3974 SuccTrueWeight = SuccFalseWeight = 1;
3975 return true;
3976 } else {
3977 return false;
3978 }
3979}
3980
3981/// Determine if the two branches share a common destination and deduce a glue
3982/// that joins the branches' conditions to arrive at the common destination if
3983/// that would be profitable.
3984static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3986 const TargetTransformInfo *TTI) {
3987 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3988 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3989 "PredBB must be a predecessor of BB.");
3990
3991 // We have the potential to fold the conditions together, but if the
3992 // predecessor branch is predictable, we may not want to merge them.
3993 uint64_t PTWeight, PFWeight;
3994 BranchProbability PBITrueProb, Likely;
3995 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3996 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3997 (PTWeight + PFWeight) != 0) {
3998 PBITrueProb =
3999 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
4000 Likely = TTI->getPredictableBranchThreshold();
4001 }
4002
4003 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4004 // Speculate the 2nd condition unless the 1st is probably true.
4005 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4006 return {{BI->getSuccessor(0), Instruction::Or, false}};
4007 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4008 // Speculate the 2nd condition unless the 1st is probably false.
4009 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4010 return {{BI->getSuccessor(1), Instruction::And, false}};
4011 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4012 // Speculate the 2nd condition unless the 1st is probably true.
4013 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4014 return {{BI->getSuccessor(1), Instruction::And, true}};
4015 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4016 // Speculate the 2nd condition unless the 1st is probably false.
4017 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4018 return {{BI->getSuccessor(0), Instruction::Or, true}};
4019 }
4020 return std::nullopt;
4021}
4022
4024 DomTreeUpdater *DTU,
4025 MemorySSAUpdater *MSSAU,
4026 const TargetTransformInfo *TTI) {
4027 BasicBlock *BB = BI->getParent();
4028 BasicBlock *PredBlock = PBI->getParent();
4029
4030 // Determine if the two branches share a common destination.
4031 BasicBlock *CommonSucc;
4033 bool InvertPredCond;
4034 std::tie(CommonSucc, Opc, InvertPredCond) =
4036
4037 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4038
4040 BB->getContext(), ConstantFolder{},
4042 // The builder is used to create instructions to eliminate the branch in
4043 // BB. If BB's terminator has !annotation metadata, add it to the new
4044 // instructions.
4045 I->copyMetadata(*BB->getTerminator(), LLVMContext::MD_annotation);
4046 }));
4047 Builder.SetInsertPoint(PBI);
4048
4049 // If we need to invert the condition in the pred block to match, do so now.
4050 if (InvertPredCond) {
4051 InvertBranch(PBI, Builder);
4052 }
4053
4054 BasicBlock *UniqueSucc =
4055 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4056
4057 // Before cloning instructions, notify the successor basic block that it
4058 // is about to have a new predecessor. This will update PHI nodes,
4059 // which will allow us to update live-out uses of bonus instructions.
4060 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4061
4062 // Try to update branch weights.
4063 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4064 SmallVector<uint64_t, 2> MDWeights;
4065 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4066 SuccTrueWeight, SuccFalseWeight)) {
4067
4068 if (PBI->getSuccessor(0) == BB) {
4069 // PBI: br i1 %x, BB, FalseDest
4070 // BI: br i1 %y, UniqueSucc, FalseDest
4071 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4072 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4073 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4074 // TrueWeight for PBI * FalseWeight for BI.
4075 // We assume that total weights of a CondBrInst can fit into 32 bits.
4076 // Therefore, we will not have overflow using 64-bit arithmetic.
4077 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4078 PredTrueWeight * SuccFalseWeight);
4079 } else {
4080 // PBI: br i1 %x, TrueDest, BB
4081 // BI: br i1 %y, TrueDest, UniqueSucc
4082 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4083 // FalseWeight for PBI * TrueWeight for BI.
4084 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4085 PredFalseWeight * SuccTrueWeight);
4086 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4087 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4088 }
4089
4090 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4091 /*ElideAllZero=*/true);
4092
4093 // TODO: If BB is reachable from all paths through PredBlock, then we
4094 // could replace PBI's branch probabilities with BI's.
4095 } else
4096 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4097
4098 // Now, update the CFG.
4099 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4100
4101 if (DTU)
4102 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4103 {DominatorTree::Delete, PredBlock, BB}});
4104
4105 // If BI was a loop latch, it may have had associated loop metadata.
4106 // We need to copy it to the new latch, that is, PBI.
4107 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4108 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4109
4110 ValueToValueMapTy VMap; // maps original values to cloned values
4112
4113 Module *M = BB->getModule();
4114
4115 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4116 for (DbgVariableRecord &DVR :
4118 RemapDbgRecord(M, &DVR, VMap,
4120 }
4121
4122 // Now that the Cond was cloned into the predecessor basic block,
4123 // or/and the two conditions together.
4124 Value *BICond = VMap[BI->getCondition()];
4125 PBI->setCondition(
4126 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4128 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4129 if (!MDWeights.empty()) {
4130 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4131 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4132 /*IsExpected=*/false, /*ElideAllZero=*/true);
4133 }
4134
4135 ++NumFoldBranchToCommonDest;
4136 return true;
4137}
4138
4139/// Return if an instruction's type or any of its operands' types are a vector
4140/// type.
4141static bool isVectorOp(Instruction &I) {
4142 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4143 return U->getType()->isVectorTy();
4144 });
4145}
4146
4147/// If this basic block is simple enough, and if a predecessor branches to us
4148/// and one of our successors, fold the block into the predecessor and use
4149/// logical operations to pick the right destination.
4151 MemorySSAUpdater *MSSAU,
4152 const TargetTransformInfo *TTI,
4153 AssumptionCache *AC,
4154 unsigned BonusInstThreshold) {
4155 BasicBlock *BB = BI->getParent();
4159
4161
4163 Cond->getParent() != BB || !Cond->hasOneUse())
4164 return false;
4165
4166 // Finally, don't infinitely unroll conditional loops.
4167 if (is_contained(successors(BB), BB))
4168 return false;
4169
4170 // With which predecessors will we want to deal with?
4172 for (BasicBlock *PredBlock : predecessors(BB)) {
4173 CondBrInst *PBI = dyn_cast<CondBrInst>(PredBlock->getTerminator());
4174
4175 // Check that we have two conditional branches. If there is a PHI node in
4176 // the common successor, verify that the same value flows in from both
4177 // blocks.
4178 if (!PBI || !safeToMergeTerminators(BI, PBI))
4179 continue;
4180
4181 // Determine if the two branches share a common destination.
4182 BasicBlock *CommonSucc;
4184 bool InvertPredCond;
4185 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4186 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4187 else
4188 continue;
4189
4190 // Check the cost of inserting the necessary logic before performing the
4191 // transformation.
4192 if (TTI) {
4193 Type *Ty = BI->getCondition()->getType();
4194 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4195 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4196 !isa<CmpInst>(PBI->getCondition())))
4197 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4198
4200 continue;
4201 }
4202
4203 // Ok, we do want to deal with this predecessor. Record it.
4204 Preds.emplace_back(PredBlock);
4205 }
4206
4207 // If there aren't any predecessors into which we can fold,
4208 // don't bother checking the cost.
4209 if (Preds.empty())
4210 return false;
4211
4212 // Only allow this transformation if computing the condition doesn't involve
4213 // too many instructions and these involved instructions can be executed
4214 // unconditionally. We denote all involved instructions except the condition
4215 // as "bonus instructions", and only allow this transformation when the
4216 // number of the bonus instructions we'll need to create when cloning into
4217 // each predecessor does not exceed a certain threshold.
4218 unsigned NumBonusInsts = 0;
4219 bool SawVectorOp = false;
4220 const unsigned PredCount = Preds.size();
4221 // Speculated instructions will be inserted before the terminator of the
4222 // predecessor. Only handle the simple case of one predecessor.
4223 const Instruction *CxtI =
4224 PredCount == 1 ? Preds[0]->getTerminator() : nullptr;
4225 for (Instruction &I : *BB) {
4226 // Don't check the branch condition comparison itself.
4227 if (&I == Cond)
4228 continue;
4229 // Ignore the terminator.
4231 continue;
4232 // Pseudo probes aren't speculatable but can be dropped on fold.
4234 continue;
4235 // I must be safe to execute unconditionally.
4236 if (!isSafeToSpeculativelyExecute(&I, CxtI, AC))
4237 return false;
4238 SawVectorOp |= isVectorOp(I);
4239
4240 // Account for the cost of duplicating this instruction into each
4241 // predecessor. Ignore free instructions.
4242 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4244 NumBonusInsts += PredCount;
4245
4246 // Early exits once we reach the limit.
4247 if (NumBonusInsts >
4248 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4249 return false;
4250 }
4251
4252 auto IsBCSSAUse = [BB, &I](Use &U) {
4253 auto *UI = cast<Instruction>(U.getUser());
4254 if (auto *PN = dyn_cast<PHINode>(UI))
4255 return PN->getIncomingBlock(U) == BB;
4256 return UI->getParent() == BB && I.comesBefore(UI);
4257 };
4258
4259 // Does this instruction require rewriting of uses?
4260 if (!all_of(I.uses(), IsBCSSAUse))
4261 return false;
4262 }
4263 if (NumBonusInsts >
4264 BonusInstThreshold *
4265 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4266 return false;
4267
4268 // Ok, we have the budget. Perform the transformation.
4269 for (BasicBlock *PredBlock : Preds) {
4270 auto *PBI = cast<CondBrInst>(PredBlock->getTerminator());
4271 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4272 }
4273 return false;
4274}
4275
4276// If there is only one store in BB1 and BB2, return it, otherwise return
4277// nullptr.
4279 StoreInst *S = nullptr;
4280 for (auto *BB : {BB1, BB2}) {
4281 if (!BB)
4282 continue;
4283 for (auto &I : *BB)
4284 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4285 if (S)
4286 // Multiple stores seen.
4287 return nullptr;
4288 else
4289 S = SI;
4290 }
4291 }
4292 return S;
4293}
4294
4296 Value *AlternativeV = nullptr) {
4297 // PHI is going to be a PHI node that allows the value V that is defined in
4298 // BB to be referenced in BB's only successor.
4299 //
4300 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4301 // doesn't matter to us what the other operand is (it'll never get used). We
4302 // could just create a new PHI with an undef incoming value, but that could
4303 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4304 // other PHI. So here we directly look for some PHI in BB's successor with V
4305 // as an incoming operand. If we find one, we use it, else we create a new
4306 // one.
4307 //
4308 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4309 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4310 // where OtherBB is the single other predecessor of BB's only successor.
4311 PHINode *PHI = nullptr;
4312 BasicBlock *Succ = BB->getSingleSuccessor();
4313
4314 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4315 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4316 PHI = cast<PHINode>(I);
4317 if (!AlternativeV)
4318 break;
4319
4320 assert(Succ->hasNPredecessors(2));
4321 auto PredI = pred_begin(Succ);
4322 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4323 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4324 break;
4325 PHI = nullptr;
4326 }
4327 if (PHI)
4328 return PHI;
4329
4330 // If V is not an instruction defined in BB, just return it.
4331 if (!AlternativeV &&
4332 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4333 return V;
4334
4335 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4336 PHI->insertBefore(Succ->begin());
4337 PHI->addIncoming(V, BB);
4338 for (BasicBlock *PredBB : predecessors(Succ))
4339 if (PredBB != BB)
4340 PHI->addIncoming(
4341 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4342 return PHI;
4343}
4344
4346 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4347 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4348 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4349 // For every pointer, there must be exactly two stores, one coming from
4350 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4351 // store (to any address) in PTB,PFB or QTB,QFB.
4352 // FIXME: We could relax this restriction with a bit more work and performance
4353 // testing.
4354 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4355 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4356 if (!PStore || !QStore)
4357 return false;
4358
4359 // Now check the stores are compatible.
4360 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4361 PStore->getOrdering() != QStore->getOrdering() ||
4362 PStore->getSyncScopeID() != QStore->getSyncScopeID() ||
4363 PStore->getValueOperand()->getType() !=
4364 QStore->getValueOperand()->getType())
4365 return false;
4366
4367 // Check that sinking the store won't cause program behavior changes. Sinking
4368 // the store out of the Q blocks won't change any behavior as we're sinking
4369 // from a block to its unconditional successor. But we're moving a store from
4370 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4371 // So we need to check that there are no aliasing loads or stores in
4372 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4373 // operations between PStore and the end of its parent block.
4374 //
4375 // The ideal way to do this is to query AliasAnalysis, but we don't
4376 // preserve AA currently so that is dangerous. Be super safe and just
4377 // check there are no other memory operations at all.
4378 for (auto &I : *QFB->getSinglePredecessor())
4379 if (I.mayReadOrWriteMemory())
4380 return false;
4381 for (auto &I : *QFB)
4382 if (&I != QStore && I.mayReadOrWriteMemory())
4383 return false;
4384 if (QTB)
4385 for (auto &I : *QTB)
4386 if (&I != QStore && I.mayReadOrWriteMemory())
4387 return false;
4388 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4389 I != E; ++I)
4390 if (&*I != PStore && I->mayReadOrWriteMemory())
4391 return false;
4392
4393 // If we're not in aggressive mode, we only optimize if we have some
4394 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4395 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4396 if (!BB)
4397 return true;
4398 // Heuristic: if the block can be if-converted/phi-folded and the
4399 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4400 // thread this store.
4401 InstructionCost Cost = 0;
4402 InstructionCost Budget =
4404 for (auto &I : *BB) {
4405 // Consider terminator instruction to be free.
4406 if (I.isTerminator())
4407 continue;
4408 // If this is one the stores that we want to speculate out of this BB,
4409 // then don't count it's cost, consider it to be free.
4410 if (auto *S = dyn_cast<StoreInst>(&I))
4411 if (llvm::find(FreeStores, S))
4412 continue;
4413 // Else, we have a white-list of instructions that we are ak speculating.
4415 return false; // Not in white-list - not worthwhile folding.
4416 // And finally, if this is a non-free instruction that we are okay
4417 // speculating, ensure that we consider the speculation budget.
4418 Cost +=
4419 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4420 if (Cost > Budget)
4421 return false; // Eagerly refuse to fold as soon as we're out of budget.
4422 }
4423 assert(Cost <= Budget &&
4424 "When we run out of budget we will eagerly return from within the "
4425 "per-instruction loop.");
4426 return true;
4427 };
4428
4429 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4431 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4432 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4433 return false;
4434
4435 // If PostBB has more than two predecessors, we need to split it so we can
4436 // sink the store.
4437 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4438 // We know that QFB's only successor is PostBB. And QFB has a single
4439 // predecessor. If QTB exists, then its only successor is also PostBB.
4440 // If QTB does not exist, then QFB's only predecessor has a conditional
4441 // branch to QFB and PostBB.
4442 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4443 BasicBlock *NewBB =
4444 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4445 if (!NewBB)
4446 return false;
4447 PostBB = NewBB;
4448 }
4449
4450 // OK, we're going to sink the stores to PostBB. The store has to be
4451 // conditional though, so first create the predicate.
4452 CondBrInst *PBranch =
4454 CondBrInst *QBranch =
4456 Value *PCond = PBranch->getCondition();
4457 Value *QCond = QBranch->getCondition();
4458
4460 PStore->getParent());
4462 QStore->getParent(), PPHI);
4463
4464 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4465 IRBuilder<> QB(PostBB, PostBBFirst);
4466 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4467
4468 InvertPCond ^= (PStore->getParent() != PTB);
4469 InvertQCond ^= (QStore->getParent() != QTB);
4470 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4471 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4472
4473 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4474
4475 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4476 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4477 /*Unreachable=*/false,
4478 /*BranchWeights=*/nullptr, DTU);
4479 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4481 SmallVector<uint32_t, 2> PWeights, QWeights;
4482 extractBranchWeights(*PBranch, PWeights);
4483 extractBranchWeights(*QBranch, QWeights);
4484 if (InvertPCond)
4485 std::swap(PWeights[0], PWeights[1]);
4486 if (InvertQCond)
4487 std::swap(QWeights[0], QWeights[1]);
4488 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4490 {CombinedWeights[0], CombinedWeights[1]},
4491 /*IsExpected=*/false, /*ElideAllZero=*/true);
4492 }
4493
4494 QB.SetInsertPoint(T);
4495 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4496 combineMetadataForCSE(QStore, PStore, true);
4497 SI->copyMetadata(*QStore);
4498 // Update any dbg.assign intrinsics to track the merged value (QPHI) instead
4499 // of the original constant values, likely making these identical.
4500 for (auto *DbgAssign : at::getDVRAssignmentMarkers(SI)) {
4501 if (llvm::is_contained(DbgAssign->location_ops(),
4502 PStore->getValueOperand()))
4503 DbgAssign->replaceVariableLocationOp(PStore->getValueOperand(), QPHI);
4504 if (llvm::is_contained(DbgAssign->location_ops(),
4505 QStore->getValueOperand()))
4506 DbgAssign->replaceVariableLocationOp(QStore->getValueOperand(), QPHI);
4507 }
4508
4509 // Choose the minimum alignment. If we could prove both stores execute, we
4510 // could use biggest one. In this case, though, we only know that one of the
4511 // stores executes. And we don't know it's safe to take the alignment from a
4512 // store that doesn't execute.
4513 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4514
4515 if (QStore->isAtomic())
4516 SI->setAtomic(QStore->getOrdering(), QStore->getSyncScopeID());
4517
4518 QStore->eraseFromParent();
4519 PStore->eraseFromParent();
4520
4521 return true;
4522}
4523
4525 DomTreeUpdater *DTU, const DataLayout &DL,
4526 const TargetTransformInfo &TTI) {
4527 // The intention here is to find diamonds or triangles (see below) where each
4528 // conditional block contains a store to the same address. Both of these
4529 // stores are conditional, so they can't be unconditionally sunk. But it may
4530 // be profitable to speculatively sink the stores into one merged store at the
4531 // end, and predicate the merged store on the union of the two conditions of
4532 // PBI and QBI.
4533 //
4534 // This can reduce the number of stores executed if both of the conditions are
4535 // true, and can allow the blocks to become small enough to be if-converted.
4536 // This optimization will also chain, so that ladders of test-and-set
4537 // sequences can be if-converted away.
4538 //
4539 // We only deal with simple diamonds or triangles:
4540 //
4541 // PBI or PBI or a combination of the two
4542 // / \ | \
4543 // PTB PFB | PFB
4544 // \ / | /
4545 // QBI QBI
4546 // / \ | \
4547 // QTB QFB | QFB
4548 // \ / | /
4549 // PostBB PostBB
4550 //
4551 // We model triangles as a type of diamond with a nullptr "true" block.
4552 // Triangles are canonicalized so that the fallthrough edge is represented by
4553 // a true condition, as in the diagram above.
4554 BasicBlock *PTB = PBI->getSuccessor(0);
4555 BasicBlock *PFB = PBI->getSuccessor(1);
4556 BasicBlock *QTB = QBI->getSuccessor(0);
4557 BasicBlock *QFB = QBI->getSuccessor(1);
4558 BasicBlock *PostBB = QFB->getSingleSuccessor();
4559
4560 // Make sure we have a good guess for PostBB. If QTB's only successor is
4561 // QFB, then QFB is a better PostBB.
4562 if (QTB->getSingleSuccessor() == QFB)
4563 PostBB = QFB;
4564
4565 // If we couldn't find a good PostBB, stop.
4566 if (!PostBB)
4567 return false;
4568
4569 bool InvertPCond = false, InvertQCond = false;
4570 // Canonicalize fallthroughs to the true branches.
4571 if (PFB == QBI->getParent()) {
4572 std::swap(PFB, PTB);
4573 InvertPCond = true;
4574 }
4575 if (QFB == PostBB) {
4576 std::swap(QFB, QTB);
4577 InvertQCond = true;
4578 }
4579
4580 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4581 // and QFB may not. Model fallthroughs as a nullptr block.
4582 if (PTB == QBI->getParent())
4583 PTB = nullptr;
4584 if (QTB == PostBB)
4585 QTB = nullptr;
4586
4587 // Legality bailouts. We must have at least the non-fallthrough blocks and
4588 // the post-dominating block, and the non-fallthroughs must only have one
4589 // predecessor.
4590 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4591 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4592 };
4593 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4594 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4595 return false;
4596 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4597 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4598 return false;
4599 if (!QBI->getParent()->hasNUses(2))
4600 return false;
4601
4602 // OK, this is a sequence of two diamonds or triangles.
4603 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4604 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4605 for (auto *BB : {PTB, PFB}) {
4606 if (!BB)
4607 continue;
4608 for (auto &I : *BB)
4610 PStoreAddresses.insert(SI->getPointerOperand());
4611 }
4612 for (auto *BB : {QTB, QFB}) {
4613 if (!BB)
4614 continue;
4615 for (auto &I : *BB)
4617 QStoreAddresses.insert(SI->getPointerOperand());
4618 }
4619
4620 set_intersect(PStoreAddresses, QStoreAddresses);
4621 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4622 // clear what it contains.
4623 auto &CommonAddresses = PStoreAddresses;
4624
4625 bool Changed = false;
4626 for (auto *Address : CommonAddresses)
4627 Changed |=
4628 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4629 InvertPCond, InvertQCond, DTU, DL, TTI);
4630 return Changed;
4631}
4632
4633/// If the previous block ended with a widenable branch, determine if reusing
4634/// the target block is profitable and legal. This will have the effect of
4635/// "widening" PBI, but doesn't require us to reason about hosting safety.
4637 DomTreeUpdater *DTU) {
4638 // TODO: This can be generalized in two important ways:
4639 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4640 // values from the PBI edge.
4641 // 2) We can sink side effecting instructions into BI's fallthrough
4642 // successor provided they doesn't contribute to computation of
4643 // BI's condition.
4644 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4645 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4646 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4647 !BI->getParent()->getSinglePredecessor())
4648 return false;
4649 if (!IfFalseBB->phis().empty())
4650 return false; // TODO
4651 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4652 // may undo the transform done here.
4653 // TODO: There might be a more fine-grained solution to this.
4654 if (!llvm::succ_empty(IfFalseBB))
4655 return false;
4656 // Use lambda to lazily compute expensive condition after cheap ones.
4657 auto NoSideEffects = [](BasicBlock &BB) {
4658 return llvm::none_of(BB, [](const Instruction &I) {
4659 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4660 });
4661 };
4662 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4663 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4664 NoSideEffects(*BI->getParent())) {
4665 auto *OldSuccessor = BI->getSuccessor(1);
4666 OldSuccessor->removePredecessor(BI->getParent());
4667 BI->setSuccessor(1, IfFalseBB);
4668 if (DTU)
4669 DTU->applyUpdates(
4670 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4671 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4672 return true;
4673 }
4674 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4675 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4676 NoSideEffects(*BI->getParent())) {
4677 auto *OldSuccessor = BI->getSuccessor(0);
4678 OldSuccessor->removePredecessor(BI->getParent());
4679 BI->setSuccessor(0, IfFalseBB);
4680 if (DTU)
4681 DTU->applyUpdates(
4682 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4683 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4684 return true;
4685 }
4686 return false;
4687}
4688
4689/// If we have a conditional branch as a predecessor of another block,
4690/// this function tries to simplify it. We know
4691/// that PBI and BI are both conditional branches, and BI is in one of the
4692/// successor blocks of PBI - PBI branches to BI.
4694 DomTreeUpdater *DTU,
4695 const DataLayout &DL,
4696 const TargetTransformInfo &TTI) {
4697 BasicBlock *BB = BI->getParent();
4698
4699 // If this block ends with a branch instruction, and if there is a
4700 // predecessor that ends on a branch of the same condition, make
4701 // this conditional branch redundant.
4702 if (PBI->getCondition() == BI->getCondition() &&
4703 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4704 // Okay, the outcome of this conditional branch is statically
4705 // knowable. If this block had a single pred, handle specially, otherwise
4706 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4707 if (BB->getSinglePredecessor()) {
4708 // Turn this into a branch on constant.
4709 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4710 BI->setCondition(
4711 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4712 return true; // Nuke the branch on constant.
4713 }
4714 }
4715
4716 // If the previous block ended with a widenable branch, determine if reusing
4717 // the target block is profitable and legal. This will have the effect of
4718 // "widening" PBI, but doesn't require us to reason about hosting safety.
4719 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4720 return true;
4721
4722 // If both branches are conditional and both contain stores to the same
4723 // address, remove the stores from the conditionals and create a conditional
4724 // merged store at the end.
4725 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4726 return true;
4727
4728 // If this is a conditional branch in an empty block, and if any
4729 // predecessors are a conditional branch to one of our destinations,
4730 // fold the conditions into logical ops and one cond br.
4731
4732 // Ignore dbg intrinsics.
4733 if (&*BB->begin() != BI)
4734 return false;
4735
4736 int PBIOp, BIOp;
4737 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4738 PBIOp = 0;
4739 BIOp = 0;
4740 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4741 PBIOp = 0;
4742 BIOp = 1;
4743 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4744 PBIOp = 1;
4745 BIOp = 0;
4746 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4747 PBIOp = 1;
4748 BIOp = 1;
4749 } else {
4750 return false;
4751 }
4752
4753 // Check to make sure that the other destination of this branch
4754 // isn't BB itself. If so, this is an infinite loop that will
4755 // keep getting unwound.
4756 if (PBI->getSuccessor(PBIOp) == BB)
4757 return false;
4758
4759 // If predecessor's branch probability to BB is too low don't merge branches.
4760 SmallVector<uint32_t, 2> PredWeights;
4761 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4762 extractBranchWeights(*PBI, PredWeights) &&
4763 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4764
4766 PredWeights[PBIOp],
4767 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4768
4769 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4770 if (CommonDestProb >= Likely)
4771 return false;
4772 }
4773
4774 // Do not perform this transformation if it would require
4775 // insertion of a large number of select instructions. For targets
4776 // without predication/cmovs, this is a big pessimization.
4777
4778 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4779 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4780 unsigned NumPhis = 0;
4781 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4782 ++II, ++NumPhis) {
4783 if (NumPhis > 2) // Disable this xform.
4784 return false;
4785 }
4786
4787 // Finally, if everything is ok, fold the branches to logical ops.
4788 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4789
4790 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4791 << "AND: " << *BI->getParent());
4792
4794
4795 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4796 // branch in it, where one edge (OtherDest) goes back to itself but the other
4797 // exits. We don't *know* that the program avoids the infinite loop
4798 // (even though that seems likely). If we do this xform naively, we'll end up
4799 // recursively unpeeling the loop. Since we know that (after the xform is
4800 // done) that the block *is* infinite if reached, we just make it an obviously
4801 // infinite loop with no cond branch.
4802 if (OtherDest == BB) {
4803 // Insert it at the end of the function, because it's either code,
4804 // or it won't matter if it's hot. :)
4805 BasicBlock *InfLoopBlock =
4806 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4807 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
4808 if (DTU)
4809 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4810 OtherDest = InfLoopBlock;
4811 }
4812
4813 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4814
4815 // BI may have other predecessors. Because of this, we leave
4816 // it alone, but modify PBI.
4817
4818 // Make sure we get to CommonDest on True&True directions.
4819 Value *PBICond = PBI->getCondition();
4820 IRBuilder<NoFolder> Builder(PBI);
4821 if (PBIOp)
4822 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4823
4824 Value *BICond = BI->getCondition();
4825 if (BIOp)
4826 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4827
4828 // Merge the conditions.
4829 Value *Cond =
4830 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4831
4832 // Modify PBI to branch on the new condition to the new dests.
4833 PBI->setCondition(Cond);
4834 PBI->setSuccessor(0, CommonDest);
4835 PBI->setSuccessor(1, OtherDest);
4836
4837 if (DTU) {
4838 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4839 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4840
4841 DTU->applyUpdates(Updates);
4842 }
4843
4844 // Update branch weight for PBI.
4845 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4846 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4847 bool HasWeights =
4848 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4849 SuccTrueWeight, SuccFalseWeight);
4850 if (HasWeights) {
4851 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4852 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4853 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4854 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4855 // The weight to CommonDest should be PredCommon * SuccTotal +
4856 // PredOther * SuccCommon.
4857 // The weight to OtherDest should be PredOther * SuccOther.
4858 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4859 PredOther * SuccCommon,
4860 PredOther * SuccOther};
4861
4862 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4863 /*ElideAllZero=*/true);
4864 // Cond may be a select instruction with the first operand set to "true", or
4865 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4867 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4868 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4869 // The select is predicated on PBICond
4870 assert(SI->getCondition() == PBICond);
4871 // The corresponding probabilities are what was referred to above as
4872 // PredCommon and PredOther.
4873 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4874 /*IsExpected=*/false, /*ElideAllZero=*/true);
4875 }
4876 }
4877
4878 // OtherDest may have phi nodes. If so, add an entry from PBI's
4879 // block that are identical to the entries for BI's block.
4880 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4881
4882 // We know that the CommonDest already had an edge from PBI to
4883 // it. If it has PHIs though, the PHIs may have different
4884 // entries for BB and PBI's BB. If so, insert a select to make
4885 // them agree.
4886 for (PHINode &PN : CommonDest->phis()) {
4887 Value *BIV = PN.getIncomingValueForBlock(BB);
4888 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4889 Value *PBIV = PN.getIncomingValue(PBBIdx);
4890 if (BIV != PBIV) {
4891 // Insert a select in PBI to pick the right value.
4893 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4894 PN.setIncomingValue(PBBIdx, NV);
4895 // The select has the same condition as PBI, in the same BB. The
4896 // probabilities don't change.
4897 if (HasWeights) {
4898 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4899 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4900 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4901 /*IsExpected=*/false, /*ElideAllZero=*/true);
4902 }
4903 }
4904 }
4905
4906 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4907 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4908
4909 // This basic block is probably dead. We know it has at least
4910 // one fewer predecessor.
4911 return true;
4912}
4913
4914// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4915// true or to FalseBB if Cond is false.
4916// Takes care of updating the successors and removing the old terminator.
4917// Also makes sure not to introduce new successors by assuming that edges to
4918// non-successor TrueBBs and FalseBBs aren't reachable.
4919bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4920 Value *Cond, BasicBlock *TrueBB,
4921 BasicBlock *FalseBB,
4922 uint32_t TrueWeight,
4923 uint32_t FalseWeight) {
4924 auto *BB = OldTerm->getParent();
4925 // Remove any superfluous successor edges from the CFG.
4926 // First, figure out which successors to preserve.
4927 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4928 // successor.
4929 BasicBlock *KeepEdge1 = TrueBB;
4930 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4931
4932 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4933
4934 // Then remove the rest.
4935 for (BasicBlock *Succ : successors(OldTerm)) {
4936 // Make sure only to keep exactly one copy of each edge.
4937 if (Succ == KeepEdge1)
4938 KeepEdge1 = nullptr;
4939 else if (Succ == KeepEdge2)
4940 KeepEdge2 = nullptr;
4941 else {
4942 Succ->removePredecessor(BB,
4943 /*KeepOneInputPHIs=*/true);
4944
4945 if (Succ != TrueBB && Succ != FalseBB)
4946 RemovedSuccessors.insert(Succ);
4947 }
4948 }
4949
4950 IRBuilder<> Builder(OldTerm);
4951 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4952
4953 // Insert an appropriate new terminator.
4954 if (!KeepEdge1 && !KeepEdge2) {
4955 if (TrueBB == FalseBB) {
4956 // We were only looking for one successor, and it was present.
4957 // Create an unconditional branch to it.
4958 Builder.CreateBr(TrueBB);
4959 } else {
4960 // We found both of the successors we were looking for.
4961 // Create a conditional branch sharing the condition of the select.
4962 CondBrInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4963 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4964 /*IsExpected=*/false, /*ElideAllZero=*/true);
4965 }
4966 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4967 // Neither of the selected blocks were successors, so this
4968 // terminator must be unreachable.
4969 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4970 } else {
4971 // One of the selected values was a successor, but the other wasn't.
4972 // Insert an unconditional branch to the one that was found;
4973 // the edge to the one that wasn't must be unreachable.
4974 if (!KeepEdge1) {
4975 // Only TrueBB was found.
4976 Builder.CreateBr(TrueBB);
4977 } else {
4978 // Only FalseBB was found.
4979 Builder.CreateBr(FalseBB);
4980 }
4981 }
4982
4984
4985 if (DTU) {
4986 SmallVector<DominatorTree::UpdateType, 2> Updates;
4987 Updates.reserve(RemovedSuccessors.size());
4988 for (auto *RemovedSuccessor : RemovedSuccessors)
4989 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4990 DTU->applyUpdates(Updates);
4991 }
4992
4993 return true;
4994}
4995
4996// Replaces
4997// (switch (select cond, X, Y)) on constant X, Y
4998// with a branch - conditional if X and Y lead to distinct BBs,
4999// unconditional otherwise.
5000bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
5001 SelectInst *Select) {
5002 // Check for constant integer values in the select.
5003 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
5004 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
5005 if (!TrueVal || !FalseVal)
5006 return false;
5007
5008 // Find the relevant condition and destinations.
5009 Value *Condition = Select->getCondition();
5010 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
5011 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
5012
5013 // Get weight for TrueBB and FalseBB.
5014 uint32_t TrueWeight = 0, FalseWeight = 0;
5015 SmallVector<uint64_t, 8> Weights;
5016 bool HasWeights = hasBranchWeightMD(*SI);
5017 if (HasWeights) {
5018 getBranchWeights(SI, Weights);
5019 if (Weights.size() == 1 + SI->getNumCases()) {
5020 TrueWeight =
5021 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
5022 FalseWeight =
5023 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
5024 }
5025 }
5026
5027 // Perform the actual simplification.
5028 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
5029 FalseWeight);
5030}
5031
5032// Replaces
5033// (indirectbr (select cond, blockaddress(@fn, BlockA),
5034// blockaddress(@fn, BlockB)))
5035// with
5036// (br cond, BlockA, BlockB).
5037bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5038 SelectInst *SI) {
5039 // Check that both operands of the select are block addresses.
5040 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5041 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5042 if (!TBA || !FBA)
5043 return false;
5044
5045 // Extract the actual blocks.
5046 BasicBlock *TrueBB = TBA->getBasicBlock();
5047 BasicBlock *FalseBB = FBA->getBasicBlock();
5048
5049 // The select's profile becomes the profile of the conditional branch that
5050 // replaces the indirect branch.
5051 SmallVector<uint32_t> SelectBranchWeights(2);
5053 extractBranchWeights(*SI, SelectBranchWeights);
5054 // Perform the actual simplification.
5055 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5056 SelectBranchWeights[0],
5057 SelectBranchWeights[1]);
5058}
5059
5060/// This is called when we find an icmp instruction
5061/// (a seteq/setne with a constant) as the only instruction in a
5062/// block that ends with an uncond branch. We are looking for a very specific
5063/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5064/// this case, we merge the first two "or's of icmp" into a switch, but then the
5065/// default value goes to an uncond block with a seteq in it, we get something
5066/// like:
5067///
5068/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5069/// DEFAULT:
5070/// %tmp = icmp eq i8 %A, 92
5071/// br label %end
5072/// end:
5073/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5074///
5075/// We prefer to split the edge to 'end' so that there is a true/false entry to
5076/// the PHI, merging the third icmp into the switch.
5077bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5078 ICmpInst *ICI, IRBuilder<> &Builder) {
5079 // Select == nullptr means we assume that there is a hidden no-op select
5080 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5081 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5082}
5083
5084/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5085/// case. This is called when we find an icmp instruction (a seteq/setne with a
5086/// constant) and its following select instruction as the only TWO instructions
5087/// in a block that ends with an uncond branch. We are looking for a very
5088/// specific pattern that occurs when "
5089/// if (A == 1) return C1;
5090/// if (A == 2) return C2;
5091/// if (A < 3) return C3;
5092/// return C4;
5093/// " gets simplified. In this case, we merge the first two "branches of icmp"
5094/// into a switch, but then the default value goes to an uncond block with a lt
5095/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5096/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5097/// get something like:
5098///
5099/// case1:
5100/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5101/// case2:
5102/// br label %end
5103/// DEFAULT:
5104/// %tmp = icmp eq i8 %A, 2
5105/// %val = select i1 %tmp, i8 C3, i8 C4
5106/// br label %end
5107/// end:
5108/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5109///
5110/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5111/// to the PHI, merging the icmp & select into the switch, as follows:
5112///
5113/// case1:
5114/// switch i8 %A, label %DEFAULT [
5115/// i8 0, label %end
5116/// i8 1, label %case2
5117/// i8 2, label %case3
5118/// ]
5119/// case2:
5120/// br label %end
5121/// case3:
5122/// br label %end
5123/// DEFAULT:
5124/// br label %end
5125/// end:
5126/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5127bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5128 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5129 BasicBlock *BB = ICI->getParent();
5130
5131 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5132 // too complex.
5133 /// TODO: support multi-phis in succ BB of select's BB.
5134 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5135 (Select && !Select->hasOneUse()))
5136 return false;
5137
5138 // The pattern we're looking for is where our only predecessor is a switch on
5139 // 'V' and this block is the default case for the switch. In this case we can
5140 // fold the compared value into the switch to simplify things.
5141 BasicBlock *Pred = BB->getSinglePredecessor();
5142 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5143 return false;
5144
5145 Value *IcmpCond;
5146 ConstantInt *NewCaseVal;
5147 CmpPredicate Predicate;
5148
5149 // Match icmp X, C
5150 if (!match(ICI,
5151 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5152 return false;
5153
5154 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5156 if (!Select) {
5157 // If Select == nullptr, we can assume that there is a hidden no-op select
5158 // just after icmp
5159 SelectCond = ICI;
5160 SelectTrueVal = Builder.getTrue();
5161 SelectFalseVal = Builder.getFalse();
5162 User = ICI->user_back();
5163 } else {
5164 SelectCond = Select->getCondition();
5165 // Check if the select condition is the same as the icmp condition.
5166 if (SelectCond != ICI)
5167 return false;
5168 SelectTrueVal = Select->getTrueValue();
5169 SelectFalseVal = Select->getFalseValue();
5170 User = Select->user_back();
5171 }
5172
5173 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5174 if (SI->getCondition() != IcmpCond)
5175 return false;
5176
5177 // If BB is reachable on a non-default case, then we simply know the value of
5178 // V in this block. Substitute it and constant fold the icmp instruction
5179 // away.
5180 if (SI->getDefaultDest() != BB) {
5181 ConstantInt *VVal = SI->findCaseDest(BB);
5182 assert(VVal && "Should have a unique destination value");
5183 ICI->setOperand(0, VVal);
5184
5185 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5186 ICI->replaceAllUsesWith(V);
5187 ICI->eraseFromParent();
5188 }
5189 // BB is now empty, so it is likely to simplify away.
5190 return requestResimplify();
5191 }
5192
5193 // Ok, the block is reachable from the default dest. If the constant we're
5194 // comparing exists in one of the other edges, then we can constant fold ICI
5195 // and zap it.
5196 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5197 Value *V;
5198 if (Predicate == ICmpInst::ICMP_EQ)
5200 else
5202
5203 ICI->replaceAllUsesWith(V);
5204 ICI->eraseFromParent();
5205 // BB is now empty, so it is likely to simplify away.
5206 return requestResimplify();
5207 }
5208
5209 // The use of the select has to be in the 'end' block, by the only PHI node in
5210 // the block.
5211 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5212 PHINode *PHIUse = dyn_cast<PHINode>(User);
5213 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5215 return false;
5216
5217 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5218 // edge gets SelectTrueVal in the PHI.
5219 Value *DefaultCst = SelectFalseVal;
5220 Value *NewCst = SelectTrueVal;
5221
5222 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5223 std::swap(DefaultCst, NewCst);
5224
5225 // Replace Select (which is used by the PHI for the default value) with
5226 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5227 if (Select) {
5228 Select->replaceAllUsesWith(DefaultCst);
5229 Select->eraseFromParent();
5230 } else {
5231 ICI->replaceAllUsesWith(DefaultCst);
5232 }
5233 ICI->eraseFromParent();
5234
5235 SmallVector<DominatorTree::UpdateType, 2> Updates;
5236
5237 // Okay, the switch goes to this block on a default value. Add an edge from
5238 // the switch to the merge point on the compared value.
5239 BasicBlock *NewBB =
5240 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5241 {
5242 SwitchInstProfUpdateWrapper SIW(*SI);
5243 auto W0 = SIW.getSuccessorWeight(0);
5245 if (W0) {
5246 NewW = ((uint64_t(*W0) + 1) >> 1);
5247 SIW.setSuccessorWeight(0, *NewW);
5248 }
5249 SIW.addCase(NewCaseVal, NewBB, NewW);
5250 if (DTU)
5251 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5252 }
5253
5254 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5255 Builder.SetInsertPoint(NewBB);
5256 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5257 Builder.CreateBr(SuccBlock);
5258 PHIUse->addIncoming(NewCst, NewBB);
5259 if (DTU) {
5260 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5261 DTU->applyUpdates(Updates);
5262 }
5263 return true;
5264}
5265
5266/// Check to see if it is branching on an or/and chain of icmp instructions, and
5267/// fold it into a switch instruction if so.
5268bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5269 IRBuilder<> &Builder,
5270 const DataLayout &DL) {
5272 if (!Cond)
5273 return false;
5274
5275 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5276 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5277 // 'setne's and'ed together, collect them.
5278
5279 // Try to gather values from a chain of and/or to be turned into a switch
5280 ConstantComparesGatherer ConstantCompare(Cond, DL);
5281 // Unpack the result
5282 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5283 Value *CompVal = ConstantCompare.CompValue;
5284 unsigned UsedICmps = ConstantCompare.UsedICmps;
5285 Value *ExtraCase = ConstantCompare.Extra;
5286 bool TrueWhenEqual = ConstantCompare.IsEq;
5287
5288 // If we didn't have a multiply compared value, fail.
5289 if (!CompVal)
5290 return false;
5291
5292 // Avoid turning single icmps into a switch.
5293 if (UsedICmps <= 1)
5294 return false;
5295
5296 // There might be duplicate constants in the list, which the switch
5297 // instruction can't handle, remove them now.
5299 Values.erase(llvm::unique(Values), Values.end());
5300
5301 // If Extra was used, we require at least two switch values to do the
5302 // transformation. A switch with one value is just a conditional branch.
5303 if (ExtraCase && Values.size() < 2)
5304 return false;
5305
5306 SmallVector<uint32_t> BranchWeights;
5307 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5308 extractBranchWeights(*BI, BranchWeights);
5309
5310 // Figure out which block is which destination.
5311 BasicBlock *DefaultBB = BI->getSuccessor(1);
5312 BasicBlock *EdgeBB = BI->getSuccessor(0);
5313 if (!TrueWhenEqual) {
5314 std::swap(DefaultBB, EdgeBB);
5315 if (HasProfile)
5316 std::swap(BranchWeights[0], BranchWeights[1]);
5317 }
5318
5319 BasicBlock *BB = BI->getParent();
5320
5321 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5322 << " cases into SWITCH. BB is:\n"
5323 << *BB);
5324
5325 SmallVector<DominatorTree::UpdateType, 2> Updates;
5326
5327 // If there are any extra values that couldn't be folded into the switch
5328 // then we evaluate them with an explicit branch first. Split the block
5329 // right before the condbr to handle it.
5330 if (ExtraCase) {
5331 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5332 /*MSSAU=*/nullptr, "switch.early.test");
5333
5334 // Remove the uncond branch added to the old block.
5335 Instruction *OldTI = BB->getTerminator();
5336 Builder.SetInsertPoint(OldTI);
5337
5338 // There can be an unintended UB if extra values are Poison. Before the
5339 // transformation, extra values may not be evaluated according to the
5340 // condition, and it will not raise UB. But after transformation, we are
5341 // evaluating extra values before checking the condition, and it will raise
5342 // UB. It can be solved by adding freeze instruction to extra values.
5343 AssumptionCache *AC = Options.AC;
5344
5345 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5346 ExtraCase = Builder.CreateFreeze(ExtraCase);
5347
5348 // We don't have any info about this condition.
5349 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5350 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5352
5353 OldTI->eraseFromParent();
5354
5355 if (DTU)
5356 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5357
5358 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5359 // for the edge we just added.
5360 addPredecessorToBlock(EdgeBB, BB, NewBB);
5361
5362 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5363 << "\nEXTRABB = " << *BB);
5364 BB = NewBB;
5365 }
5366
5367 Builder.SetInsertPoint(BI);
5368 // Convert pointer to int before we switch.
5369 if (CompVal->getType()->isPointerTy()) {
5370 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5371 "Should not end up here with unstable pointers");
5372 CompVal = Builder.CreatePtrToInt(
5373 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5374 }
5375
5376 // Check if we can represent the values as a contiguous range. If so, we use a
5377 // range check + conditional branch instead of a switch.
5378 if (Values.front()->getValue() - Values.back()->getValue() ==
5379 Values.size() - 1) {
5380 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5381 Values.back()->getValue(), Values.front()->getValue() + 1);
5382 APInt Offset, RHS;
5383 ICmpInst::Predicate Pred;
5384 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5385 Value *X = CompVal;
5386 if (!Offset.isZero())
5387 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5388 Value *Cond =
5389 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5390 CondBrInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5391 if (HasProfile)
5392 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5393 // We don't need to update PHI nodes since we don't add any new edges.
5394 } else {
5395 // Create the new switch instruction now.
5396 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5397 if (HasProfile) {
5398 // We know the weight of the default case. We don't know the weight of the
5399 // other cases, but rather than completely lose profiling info, we split
5400 // the remaining probability equally over them.
5401 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5402 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5403 // if TrueWhenEqual.
5404 for (auto &V : drop_begin(NewWeights))
5405 V = BranchWeights[0] / Values.size();
5406 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5407 }
5408
5409 // Add all of the 'cases' to the switch instruction.
5410 for (ConstantInt *Val : Values)
5411 New->addCase(Val, EdgeBB);
5412
5413 // We added edges from PI to the EdgeBB. As such, if there were any
5414 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5415 // the number of edges added.
5416 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5417 PHINode *PN = cast<PHINode>(BBI);
5418 Value *InVal = PN->getIncomingValueForBlock(BB);
5419 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5420 PN->addIncoming(InVal, BB);
5421 }
5422 }
5423
5424 // Erase the old branch instruction.
5426 if (DTU)
5427 DTU->applyUpdates(Updates);
5428
5429 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5430 return true;
5431}
5432
5433bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5434 if (isa<PHINode>(RI->getValue()))
5435 return simplifyCommonResume(RI);
5436 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5437 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5438 // The resume must unwind the exception that caused control to branch here.
5439 return simplifySingleResume(RI);
5440
5441 return false;
5442}
5443
5444// Check if cleanup block is empty
5446 for (Instruction &I : R) {
5447 auto *II = dyn_cast<IntrinsicInst>(&I);
5448 if (!II)
5449 return false;
5450
5451 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5452 switch (IntrinsicID) {
5453 case Intrinsic::dbg_declare:
5454 case Intrinsic::dbg_value:
5455 case Intrinsic::dbg_label:
5456 case Intrinsic::lifetime_end:
5457 break;
5458 default:
5459 return false;
5460 }
5461 }
5462 return true;
5463}
5464
5465// Simplify resume that is shared by several landing pads (phi of landing pad).
5466bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5467 BasicBlock *BB = RI->getParent();
5468
5469 // Check that there are no other instructions except for debug and lifetime
5470 // intrinsics between the phi's and resume instruction.
5471 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5472 BB->getTerminator()->getIterator())))
5473 return false;
5474
5475 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5476 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5477
5478 // Check incoming blocks to see if any of them are trivial.
5479 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5480 Idx++) {
5481 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5482 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5483
5484 // If the block has other successors, we can not delete it because
5485 // it has other dependents.
5486 if (IncomingBB->getUniqueSuccessor() != BB)
5487 continue;
5488
5489 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5490 // Not the landing pad that caused the control to branch here.
5491 if (IncomingValue != LandingPad)
5492 continue;
5493
5495 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5496 TrivialUnwindBlocks.insert(IncomingBB);
5497 }
5498
5499 // If no trivial unwind blocks, don't do any simplifications.
5500 if (TrivialUnwindBlocks.empty())
5501 return false;
5502
5503 // Turn all invokes that unwind here into calls.
5504 for (auto *TrivialBB : TrivialUnwindBlocks) {
5505 // Blocks that will be simplified should be removed from the phi node.
5506 // Note there could be multiple edges to the resume block, and we need
5507 // to remove them all.
5508 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5509 BB->removePredecessor(TrivialBB, true);
5510
5511 for (BasicBlock *Pred :
5513 removeUnwindEdge(Pred, DTU);
5514 ++NumInvokes;
5515 }
5516
5517 // In each SimplifyCFG run, only the current processed block can be erased.
5518 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5519 // of erasing TrivialBB, we only remove the branch to the common resume
5520 // block so that we can later erase the resume block since it has no
5521 // predecessors.
5522 TrivialBB->getTerminator()->eraseFromParent();
5523 new UnreachableInst(RI->getContext(), TrivialBB);
5524 if (DTU)
5525 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5526 }
5527
5528 // Delete the resume block if all its predecessors have been removed.
5529 if (pred_empty(BB))
5530 DeleteDeadBlock(BB, DTU);
5531
5532 return !TrivialUnwindBlocks.empty();
5533}
5534
5535// Simplify resume that is only used by a single (non-phi) landing pad.
5536bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5537 BasicBlock *BB = RI->getParent();
5538 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5539 assert(RI->getValue() == LPInst &&
5540 "Resume must unwind the exception that caused control to here");
5541
5542 // Check that there are no other instructions except for debug intrinsics.
5544 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5545 return false;
5546
5547 // Turn all invokes that unwind here into calls and delete the basic block.
5548 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5549 removeUnwindEdge(Pred, DTU);
5550 ++NumInvokes;
5551 }
5552
5553 // The landingpad is now unreachable. Zap it.
5554 DeleteDeadBlock(BB, DTU);
5555 return true;
5556}
5557
5559 // If this is a trivial cleanup pad that executes no instructions, it can be
5560 // eliminated. If the cleanup pad continues to the caller, any predecessor
5561 // that is an EH pad will be updated to continue to the caller and any
5562 // predecessor that terminates with an invoke instruction will have its invoke
5563 // instruction converted to a call instruction. If the cleanup pad being
5564 // simplified does not continue to the caller, each predecessor will be
5565 // updated to continue to the unwind destination of the cleanup pad being
5566 // simplified.
5567 BasicBlock *BB = RI->getParent();
5568 CleanupPadInst *CPInst = RI->getCleanupPad();
5569 if (CPInst->getParent() != BB)
5570 // This isn't an empty cleanup.
5571 return false;
5572
5573 // We cannot kill the pad if it has multiple uses. This typically arises
5574 // from unreachable basic blocks.
5575 if (!CPInst->hasOneUse())
5576 return false;
5577
5578 // Check that there are no other instructions except for benign intrinsics.
5580 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5581 return false;
5582
5583 // If the cleanup return we are simplifying unwinds to the caller, this will
5584 // set UnwindDest to nullptr.
5585 BasicBlock *UnwindDest = RI->getUnwindDest();
5586
5587 // We're about to remove BB from the control flow. Before we do, sink any
5588 // PHINodes into the unwind destination. Doing this before changing the
5589 // control flow avoids some potentially slow checks, since we can currently
5590 // be certain that UnwindDest and BB have no common predecessors (since they
5591 // are both EH pads).
5592 if (UnwindDest) {
5593 // First, go through the PHI nodes in UnwindDest and update any nodes that
5594 // reference the block we are removing
5595 for (PHINode &DestPN : UnwindDest->phis()) {
5596 int Idx = DestPN.getBasicBlockIndex(BB);
5597 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5598 assert(Idx != -1);
5599 // This PHI node has an incoming value that corresponds to a control
5600 // path through the cleanup pad we are removing. If the incoming
5601 // value is in the cleanup pad, it must be a PHINode (because we
5602 // verified above that the block is otherwise empty). Otherwise, the
5603 // value is either a constant or a value that dominates the cleanup
5604 // pad being removed.
5605 //
5606 // Because BB and UnwindDest are both EH pads, all of their
5607 // predecessors must unwind to these blocks, and since no instruction
5608 // can have multiple unwind destinations, there will be no overlap in
5609 // incoming blocks between SrcPN and DestPN.
5610 Value *SrcVal = DestPN.getIncomingValue(Idx);
5611 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5612
5613 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5614 for (auto *Pred : predecessors(BB)) {
5615 Value *Incoming =
5616 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5617 DestPN.addIncoming(Incoming, Pred);
5618 }
5619 }
5620
5621 // Sink any remaining PHI nodes directly into UnwindDest.
5622 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5623 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5624 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5625 // If the PHI node has no uses or all of its uses are in this basic
5626 // block (meaning they are debug or lifetime intrinsics), just leave
5627 // it. It will be erased when we erase BB below.
5628 continue;
5629
5630 // Otherwise, sink this PHI node into UnwindDest.
5631 // Any predecessors to UnwindDest which are not already represented
5632 // must be back edges which inherit the value from the path through
5633 // BB. In this case, the PHI value must reference itself.
5634 for (auto *pred : predecessors(UnwindDest))
5635 if (pred != BB)
5636 PN.addIncoming(&PN, pred);
5637 PN.moveBefore(InsertPt);
5638 // Also, add a dummy incoming value for the original BB itself,
5639 // so that the PHI is well-formed until we drop said predecessor.
5640 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5641 }
5642 }
5643
5644 std::vector<DominatorTree::UpdateType> Updates;
5645
5646 // We use make_early_inc_range here because we will remove all predecessors.
5648 if (UnwindDest == nullptr) {
5649 if (DTU) {
5650 DTU->applyUpdates(Updates);
5651 Updates.clear();
5652 }
5653 removeUnwindEdge(PredBB, DTU);
5654 ++NumInvokes;
5655 } else {
5656 BB->removePredecessor(PredBB);
5657 Instruction *TI = PredBB->getTerminator();
5658 TI->replaceUsesOfWith(BB, UnwindDest);
5659 if (DTU) {
5660 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5661 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5662 }
5663 }
5664 }
5665
5666 if (DTU)
5667 DTU->applyUpdates(Updates);
5668
5669 DeleteDeadBlock(BB, DTU);
5670
5671 return true;
5672}
5673
5674// Try to merge two cleanuppads together.
5676 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5677 // with.
5678 BasicBlock *UnwindDest = RI->getUnwindDest();
5679 if (!UnwindDest)
5680 return false;
5681
5682 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5683 // be safe to merge without code duplication.
5684 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5685 return false;
5686
5687 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5688 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5689 if (!SuccessorCleanupPad)
5690 return false;
5691
5692 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5693 // Replace any uses of the successor cleanupad with the predecessor pad
5694 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5695 // funclet bundle operands.
5696 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5697 // Remove the old cleanuppad.
5698 SuccessorCleanupPad->eraseFromParent();
5699 // Now, we simply replace the cleanupret with a branch to the unwind
5700 // destination.
5701 UncondBrInst::Create(UnwindDest, RI->getParent());
5702 RI->eraseFromParent();
5703
5704 return true;
5705}
5706
5707bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5708 // It is possible to transiantly have an undef cleanuppad operand because we
5709 // have deleted some, but not all, dead blocks.
5710 // Eventually, this block will be deleted.
5711 if (isa<UndefValue>(RI->getOperand(0)))
5712 return false;
5713
5714 if (mergeCleanupPad(RI))
5715 return true;
5716
5717 if (removeEmptyCleanup(RI, DTU))
5718 return true;
5719
5720 return false;
5721}
5722
5723// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5724bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5725 BasicBlock *BB = UI->getParent();
5726
5727 bool Changed = false;
5728
5729 // Ensure that any debug-info records that used to occur after the Unreachable
5730 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5731 // the block.
5733
5734 // Debug-info records on the unreachable inst itself should be deleted, as
5735 // below we delete everything past the final executable instruction.
5736 UI->dropDbgRecords();
5737
5738 // If there are any instructions immediately before the unreachable that can
5739 // be removed, do so.
5740 while (UI->getIterator() != BB->begin()) {
5742 --BBI;
5743
5745 break; // Can not drop any more instructions. We're done here.
5746 // Otherwise, this instruction can be freely erased,
5747 // even if it is not side-effect free.
5748
5749 // Note that deleting EH's here is in fact okay, although it involves a bit
5750 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5751 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5752 // and we can therefore guarantee this block will be erased.
5753
5754 // If we're deleting this, we're deleting any subsequent debug info, so
5755 // delete DbgRecords.
5756 BBI->dropDbgRecords();
5757
5758 // Delete this instruction (any uses are guaranteed to be dead)
5759 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5760 BBI->eraseFromParent();
5761 Changed = true;
5762 }
5763
5764 // If the unreachable instruction is the first in the block, take a gander
5765 // at all of the predecessors of this instruction, and simplify them.
5766 if (&BB->front() != UI)
5767 return Changed;
5768
5769 std::vector<DominatorTree::UpdateType> Updates;
5770
5771 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5772 for (BasicBlock *Predecessor : Preds) {
5773 Instruction *TI = Predecessor->getTerminator();
5774 IRBuilder<> Builder(TI);
5775 if (isa<UncondBrInst>(TI)) {
5776 new UnreachableInst(TI->getContext(), TI->getIterator());
5777 TI->eraseFromParent();
5778 Changed = true;
5779 if (DTU)
5780 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5781 } else if (auto *BI = dyn_cast<CondBrInst>(TI)) {
5782 // We could either have a proper unconditional branch,
5783 // or a degenerate conditional branch with matching destinations.
5784 if (BI->getSuccessor(0) == BI->getSuccessor(1)) {
5785 new UnreachableInst(TI->getContext(), TI->getIterator());
5786 TI->eraseFromParent();
5787 Changed = true;
5788 } else {
5789 Value* Cond = BI->getCondition();
5790 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5791 "The destinations are guaranteed to be different here.");
5792 CallInst *Assumption;
5793 if (BI->getSuccessor(0) == BB) {
5794 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5795 Builder.CreateBr(BI->getSuccessor(1));
5796 } else {
5797 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5798 Assumption = Builder.CreateAssumption(Cond);
5799 Builder.CreateBr(BI->getSuccessor(0));
5800 }
5801 if (Options.AC)
5802 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5803
5805 Changed = true;
5806 }
5807 if (DTU)
5808 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5809 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5810 SwitchInstProfUpdateWrapper SU(*SI);
5811 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5812 if (i->getCaseSuccessor() != BB) {
5813 ++i;
5814 continue;
5815 }
5816 BB->removePredecessor(SU->getParent());
5817 i = SU.removeCase(i);
5818 e = SU->case_end();
5819 Changed = true;
5820 }
5821 // Note that the default destination can't be removed!
5822 if (DTU && SI->getDefaultDest() != BB)
5823 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5824 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5825 if (II->getUnwindDest() == BB) {
5826 if (DTU) {
5827 DTU->applyUpdates(Updates);
5828 Updates.clear();
5829 }
5830 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5831 if (!CI->doesNotThrow())
5832 CI->setDoesNotThrow();
5833 Changed = true;
5834 }
5835 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5836 if (CSI->getUnwindDest() == BB) {
5837 if (DTU) {
5838 DTU->applyUpdates(Updates);
5839 Updates.clear();
5840 }
5841 removeUnwindEdge(TI->getParent(), DTU);
5842 Changed = true;
5843 continue;
5844 }
5845
5846 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5847 E = CSI->handler_end();
5848 I != E; ++I) {
5849 if (*I == BB) {
5850 CSI->removeHandler(I);
5851 --I;
5852 --E;
5853 Changed = true;
5854 }
5855 }
5856 if (DTU)
5857 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5858 if (CSI->getNumHandlers() == 0) {
5859 if (CSI->hasUnwindDest()) {
5860 // Redirect all predecessors of the block containing CatchSwitchInst
5861 // to instead branch to the CatchSwitchInst's unwind destination.
5862 if (DTU) {
5863 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5864 Updates.push_back({DominatorTree::Insert,
5865 PredecessorOfPredecessor,
5866 CSI->getUnwindDest()});
5867 Updates.push_back({DominatorTree::Delete,
5868 PredecessorOfPredecessor, Predecessor});
5869 }
5870 }
5871 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5872 } else {
5873 // Rewrite all preds to unwind to caller (or from invoke to call).
5874 if (DTU) {
5875 DTU->applyUpdates(Updates);
5876 Updates.clear();
5877 }
5878 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5879 for (BasicBlock *EHPred : EHPreds)
5880 removeUnwindEdge(EHPred, DTU);
5881 }
5882 // The catchswitch is no longer reachable.
5883 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5884 CSI->eraseFromParent();
5885 Changed = true;
5886 }
5887 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5888 (void)CRI;
5889 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5890 "Expected to always have an unwind to BB.");
5891 if (DTU)
5892 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5893 new UnreachableInst(TI->getContext(), TI->getIterator());
5894 TI->eraseFromParent();
5895 Changed = true;
5896 }
5897 }
5898
5899 if (DTU)
5900 DTU->applyUpdates(Updates);
5901
5902 // If this block is now dead, remove it.
5903 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5904 DeleteDeadBlock(BB, DTU);
5905 return true;
5906 }
5907
5908 return Changed;
5909}
5910
5919
5920static std::optional<ContiguousCasesResult>
5923 BasicBlock *Dest, BasicBlock *OtherDest) {
5924 assert(Cases.size() >= 1);
5925
5927 const APInt &Min = Cases.back()->getValue();
5928 const APInt &Max = Cases.front()->getValue();
5929 APInt Offset = Max - Min;
5930 size_t ContiguousOffset = Cases.size() - 1;
5931 if (Offset == ContiguousOffset) {
5932 return ContiguousCasesResult{
5933 /*Min=*/Cases.back(),
5934 /*Max=*/Cases.front(),
5935 /*Dest=*/Dest,
5936 /*OtherDest=*/OtherDest,
5937 /*Cases=*/&Cases,
5938 /*OtherCases=*/&OtherCases,
5939 };
5940 }
5941 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false,
5942 SimplifyQuery(Dest->getDataLayout()));
5943 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5944 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5945 // contiguous range for the other destination. N.B. If CR is not a full range,
5946 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5947 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5948 assert(Cases.size() >= 2);
5949 auto *It =
5950 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5951 return L->getValue() != R->getValue() + 1;
5952 });
5953 if (It == Cases.end())
5954 return std::nullopt;
5955 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5956 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5957 Cases.size() - 2) {
5958 return ContiguousCasesResult{
5959 /*Min=*/cast<ConstantInt>(
5960 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5961 /*Max=*/
5963 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5964 /*Dest=*/OtherDest,
5965 /*OtherDest=*/Dest,
5966 /*Cases=*/&OtherCases,
5967 /*OtherCases=*/&Cases,
5968 };
5969 }
5970 }
5971 return std::nullopt;
5972}
5973
5975 DomTreeUpdater *DTU,
5976 bool RemoveOrigDefaultBlock = true) {
5977 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5978 auto *BB = Switch->getParent();
5979 auto *OrigDefaultBlock = Switch->getDefaultDest();
5980 if (RemoveOrigDefaultBlock)
5981 OrigDefaultBlock->removePredecessor(BB);
5982 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5983 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5984 OrigDefaultBlock);
5985 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5987 Switch->setDefaultDest(&*NewDefaultBlock);
5988 if (DTU) {
5990 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5991 if (RemoveOrigDefaultBlock &&
5992 !is_contained(successors(BB), OrigDefaultBlock))
5993 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5994 DTU->applyUpdates(Updates);
5995 }
5996}
5997
5998/// Turn a switch into an integer range comparison and branch.
5999/// Switches with more than 2 destinations are ignored.
6000/// Switches with 1 destination are also ignored.
6001bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
6002 IRBuilder<> &Builder) {
6003 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6004
6005 bool HasDefault = !SI->defaultDestUnreachable();
6006
6007 auto *BB = SI->getParent();
6008 // Partition the cases into two sets with different destinations.
6009 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
6010 BasicBlock *DestB = nullptr;
6013
6014 for (auto Case : SI->cases()) {
6015 BasicBlock *Dest = Case.getCaseSuccessor();
6016 if (!DestA)
6017 DestA = Dest;
6018 if (Dest == DestA) {
6019 CasesA.push_back(Case.getCaseValue());
6020 continue;
6021 }
6022 if (!DestB)
6023 DestB = Dest;
6024 if (Dest == DestB) {
6025 CasesB.push_back(Case.getCaseValue());
6026 continue;
6027 }
6028 return false; // More than two destinations.
6029 }
6030 if (!DestB)
6031 return false; // All destinations are the same and the default is unreachable
6032
6033 assert(DestA && DestB &&
6034 "Single-destination switch should have been folded.");
6035 assert(DestA != DestB);
6036 assert(DestB != SI->getDefaultDest());
6037 assert(!CasesB.empty() && "There must be non-default cases.");
6038 assert(!CasesA.empty() || HasDefault);
6039
6040 // Figure out if one of the sets of cases form a contiguous range.
6041 std::optional<ContiguousCasesResult> ContiguousCases;
6042
6043 // Only one icmp is needed when there is only one case.
6044 if (!HasDefault && CasesA.size() == 1)
6045 ContiguousCases = ContiguousCasesResult{
6046 /*Min=*/CasesA[0],
6047 /*Max=*/CasesA[0],
6048 /*Dest=*/DestA,
6049 /*OtherDest=*/DestB,
6050 /*Cases=*/&CasesA,
6051 /*OtherCases=*/&CasesB,
6052 };
6053 else if (CasesB.size() == 1)
6054 ContiguousCases = ContiguousCasesResult{
6055 /*Min=*/CasesB[0],
6056 /*Max=*/CasesB[0],
6057 /*Dest=*/DestB,
6058 /*OtherDest=*/DestA,
6059 /*Cases=*/&CasesB,
6060 /*OtherCases=*/&CasesA,
6061 };
6062 // Correctness: Cases to the default destination cannot be contiguous cases.
6063 else if (!HasDefault)
6064 ContiguousCases =
6065 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6066
6067 if (!ContiguousCases)
6068 ContiguousCases =
6069 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6070
6071 if (!ContiguousCases)
6072 return false;
6073
6074 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6075
6076 // Start building the compare and branch.
6077
6079 Constant *NumCases = ConstantInt::get(Offset->getType(),
6080 Max->getValue() - Min->getValue() + 1);
6081 Instruction *NewBI;
6082 if (NumCases->isOneValue()) {
6083 assert(Max->getValue() == Min->getValue());
6084 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6085 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6086 }
6087 // If NumCases overflowed, then all possible values jump to the successor.
6088 else if (NumCases->isNullValue() && !Cases->empty()) {
6089 NewBI = Builder.CreateBr(Dest);
6090 } else {
6091 Value *Sub = SI->getCondition();
6092 if (!Offset->isNullValue())
6093 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6094 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6095 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6096 }
6097
6098 // Update weight for the newly-created conditional branch.
6099 if (hasBranchWeightMD(*SI) && isa<CondBrInst>(NewBI)) {
6100 SmallVector<uint64_t, 8> Weights;
6101 getBranchWeights(SI, Weights);
6102 if (Weights.size() == 1 + SI->getNumCases()) {
6103 uint64_t TrueWeight = 0;
6104 uint64_t FalseWeight = 0;
6105 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6106 if (SI->getSuccessor(I) == Dest)
6107 TrueWeight += Weights[I];
6108 else
6109 FalseWeight += Weights[I];
6110 }
6111 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6112 TrueWeight /= 2;
6113 FalseWeight /= 2;
6114 }
6115 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6116 /*IsExpected=*/false, /*ElideAllZero=*/true);
6117 }
6118 }
6119
6120 // Prune obsolete incoming values off the successors' PHI nodes.
6121 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6122 unsigned PreviousEdges = Cases->size();
6123 if (Dest == SI->getDefaultDest())
6124 ++PreviousEdges;
6125 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6126 PHI.removeIncomingValue(SI->getParent());
6127 }
6128 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6129 unsigned PreviousEdges = OtherCases->size();
6130 if (OtherDest == SI->getDefaultDest())
6131 ++PreviousEdges;
6132 unsigned E = PreviousEdges - 1;
6133 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6134 if (isa<UncondBrInst>(NewBI))
6135 ++E;
6136 for (unsigned I = 0; I != E; ++I)
6137 PHI.removeIncomingValue(SI->getParent());
6138 }
6139
6140 // Clean up the default block - it may have phis or other instructions before
6141 // the unreachable terminator.
6142 if (!HasDefault)
6144
6145 auto *UnreachableDefault = SI->getDefaultDest();
6146
6147 // Drop the switch.
6148 SI->eraseFromParent();
6149
6150 if (!HasDefault && DTU)
6151 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6152
6153 return true;
6154}
6155
6156/// Compute masked bits for the condition of a switch
6157/// and use it to remove dead cases.
6159 AssumptionCache *AC,
6160 const DataLayout &DL) {
6161 Value *Cond = SI->getCondition();
6162 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6164 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6165
6166 // We can also eliminate cases by determining that their values are outside of
6167 // the limited range of the condition based on how many significant (non-sign)
6168 // bits are in the condition value.
6169 unsigned MaxSignificantBitsInCond =
6171
6172 // Gather dead cases.
6174 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6175 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6176 for (const auto &Case : SI->cases()) {
6177 auto *Successor = Case.getCaseSuccessor();
6178 if (DTU) {
6179 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6180 if (Inserted)
6181 UniqueSuccessors.push_back(Successor);
6182 ++It->second;
6183 }
6184 ConstantInt *CaseC = Case.getCaseValue();
6185 const APInt &CaseVal = CaseC->getValue();
6186 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6187 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6188 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6189 DeadCases.push_back(CaseC);
6190 if (DTU)
6191 --NumPerSuccessorCases[Successor];
6192 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6193 << " is dead.\n");
6194 } else if (IsKnownValuesValid)
6195 KnownValues.erase(CaseC);
6196 }
6197
6198 // If we can prove that the cases must cover all possible values, the
6199 // default destination becomes dead and we can remove it. If we know some
6200 // of the bits in the value, we can use that to more precisely compute the
6201 // number of possible unique case values.
6202 bool HasDefault = !SI->defaultDestUnreachable();
6203 const unsigned NumUnknownBits =
6204 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6205 assert(NumUnknownBits <= Known.getBitWidth());
6206 if (HasDefault && DeadCases.empty()) {
6207 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6209 return true;
6210 }
6211
6212 if (NumUnknownBits < 64 /* avoid overflow */) {
6213 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6214 if (SI->getNumCases() == AllNumCases) {
6216 return true;
6217 }
6218 // When only one case value is missing, replace default with that case.
6219 // Eliminating the default branch will provide more opportunities for
6220 // optimization, such as lookup tables.
6221 if (SI->getNumCases() == AllNumCases - 1) {
6222 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6223 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6224 if (CondTy->getIntegerBitWidth() > 64 ||
6225 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6226 return false;
6227
6228 uint64_t MissingCaseVal = 0;
6229 for (const auto &Case : SI->cases())
6230 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6231 auto *MissingCase = cast<ConstantInt>(
6232 ConstantInt::get(Cond->getType(), MissingCaseVal));
6234 SIW.addCase(MissingCase, SI->getDefaultDest(),
6235 SIW.getSuccessorWeight(0));
6237 /*RemoveOrigDefaultBlock*/ false);
6238 SIW.setSuccessorWeight(0, 0);
6239 return true;
6240 }
6241 }
6242 }
6243
6244 if (DeadCases.empty())
6245 return false;
6246
6248 for (ConstantInt *DeadCase : DeadCases) {
6249 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6250 assert(CaseI != SI->case_default() &&
6251 "Case was not found. Probably mistake in DeadCases forming.");
6252 // Prune unused values from PHI nodes.
6253 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6254 SIW.removeCase(CaseI);
6255 }
6256
6257 if (DTU) {
6258 std::vector<DominatorTree::UpdateType> Updates;
6259 for (auto *Successor : UniqueSuccessors)
6260 if (NumPerSuccessorCases[Successor] == 0)
6261 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6262 DTU->applyUpdates(Updates);
6263 }
6264
6265 return true;
6266}
6267
6268/// If BB would be eligible for simplification by
6269/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6270/// by an unconditional branch), look at the phi node for BB in the successor
6271/// block and see if the incoming value is equal to CaseValue. If so, return
6272/// the phi node, and set PhiIndex to BB's index in the phi node.
6274 BasicBlock *BB, int *PhiIndex) {
6275 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6276 return nullptr; // BB must be empty to be a candidate for simplification.
6277 if (!BB->getSinglePredecessor())
6278 return nullptr; // BB must be dominated by the switch.
6279
6281 if (!Branch)
6282 return nullptr; // Terminator must be unconditional branch.
6283
6284 BasicBlock *Succ = Branch->getSuccessor();
6285
6286 for (PHINode &PHI : Succ->phis()) {
6287 int Idx = PHI.getBasicBlockIndex(BB);
6288 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6289
6290 Value *InValue = PHI.getIncomingValue(Idx);
6291 if (InValue != CaseValue)
6292 continue;
6293
6294 *PhiIndex = Idx;
6295 return &PHI;
6296 }
6297
6298 return nullptr;
6299}
6300
6301/// Try to forward the condition of a switch instruction to a phi node
6302/// dominated by the switch, if that would mean that some of the destination
6303/// blocks of the switch can be folded away. Return true if a change is made.
6305 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6306
6307 ForwardingNodesMap ForwardingNodes;
6308 BasicBlock *SwitchBlock = SI->getParent();
6309 bool Changed = false;
6310 for (const auto &Case : SI->cases()) {
6311 ConstantInt *CaseValue = Case.getCaseValue();
6312 BasicBlock *CaseDest = Case.getCaseSuccessor();
6313
6314 // Replace phi operands in successor blocks that are using the constant case
6315 // value rather than the switch condition variable:
6316 // switchbb:
6317 // switch i32 %x, label %default [
6318 // i32 17, label %succ
6319 // ...
6320 // succ:
6321 // %r = phi i32 ... [ 17, %switchbb ] ...
6322 // -->
6323 // %r = phi i32 ... [ %x, %switchbb ] ...
6324
6325 for (PHINode &Phi : CaseDest->phis()) {
6326 // This only works if there is exactly 1 incoming edge from the switch to
6327 // a phi. If there is >1, that means multiple cases of the switch map to 1
6328 // value in the phi, and that phi value is not the switch condition. Thus,
6329 // this transform would not make sense (the phi would be invalid because
6330 // a phi can't have different incoming values from the same block).
6331 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6332 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6333 count(Phi.blocks(), SwitchBlock) == 1) {
6334 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6335 Changed = true;
6336 }
6337 }
6338
6339 // Collect phi nodes that are indirectly using this switch's case constants.
6340 int PhiIdx;
6341 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6342 ForwardingNodes[Phi].push_back(PhiIdx);
6343 }
6344
6345 for (auto &ForwardingNode : ForwardingNodes) {
6346 PHINode *Phi = ForwardingNode.first;
6347 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6348 // Check if it helps to fold PHI.
6349 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6350 continue;
6351
6352 for (int Index : Indexes)
6353 Phi->setIncomingValue(Index, SI->getCondition());
6354 Changed = true;
6355 }
6356
6357 return Changed;
6358}
6359
6360/// Return true if the backend will be able to handle
6361/// initializing an array of constants like C.
6363 if (C->isThreadDependent())
6364 return false;
6365 if (C->isDLLImportDependent())
6366 return false;
6367
6370 return false;
6371
6372 // Globals cannot contain scalable types.
6373 if (C->getType()->isScalableTy())
6374 return false;
6375
6377 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6378 // materializing the array of constants.
6379 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6380 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6381 return false;
6382 }
6383
6384 if (!TTI.shouldBuildLookupTablesForConstant(C))
6385 return false;
6386
6387 return true;
6388}
6389
6390/// If V is a Constant, return it. Otherwise, try to look up
6391/// its constant value in ConstantPool, returning 0 if it's not there.
6392static Constant *
6395 if (Constant *C = dyn_cast<Constant>(V))
6396 return C;
6397 return ConstantPool.lookup(V);
6398}
6399
6400/// Try to fold instruction I into a constant. This works for
6401/// simple instructions such as binary operations where both operands are
6402/// constant or can be replaced by constants from the ConstantPool. Returns the
6403/// resulting constant on success, 0 otherwise.
6404static Constant *
6408 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6409 if (!A)
6410 return nullptr;
6411 if (A->isAllOnesValue())
6412 return lookupConstant(Select->getTrueValue(), ConstantPool);
6413 if (A->isNullValue())
6414 return lookupConstant(Select->getFalseValue(), ConstantPool);
6415 return nullptr;
6416 }
6417
6419 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6420 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6421 COps.push_back(A);
6422 else
6423 return nullptr;
6424 }
6425
6426 return ConstantFoldInstOperands(I, COps, DL);
6427}
6428
6429/// Try to determine the resulting constant values in phi nodes
6430/// at the common destination basic block, *CommonDest, for one of the case
6431/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6432/// default case), of a switch instruction SI.
6433static bool
6435 BasicBlock **CommonDest,
6436 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6437 const DataLayout &DL, const TargetTransformInfo &TTI) {
6438 // The block from which we enter the common destination.
6439 BasicBlock *Pred = SI->getParent();
6440
6441 // If CaseDest is empty except for some side-effect free instructions through
6442 // which we can constant-propagate the CaseVal, continue to its successor.
6444 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6445 for (Instruction &I : *CaseDest) {
6446 if (I.isTerminator()) {
6447 // If the terminator is a simple branch, continue to the next block.
6448 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6449 return false;
6450 Pred = CaseDest;
6451 CaseDest = I.getSuccessor(0);
6452 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6453 // Instruction is side-effect free and constant.
6454
6455 // If the instruction has uses outside this block or a phi node slot for
6456 // the block, it is not safe to bypass the instruction since it would then
6457 // no longer dominate all its uses.
6458 for (auto &Use : I.uses()) {
6459 User *User = Use.getUser();
6461 if (I->getParent() == CaseDest)
6462 continue;
6463 if (PHINode *Phi = dyn_cast<PHINode>(User))
6464 if (Phi->getIncomingBlock(Use) == CaseDest)
6465 continue;
6466 return false;
6467 }
6468
6469 ConstantPool.insert(std::make_pair(&I, C));
6470 } else {
6471 break;
6472 }
6473 }
6474
6475 // If we did not have a CommonDest before, use the current one.
6476 if (!*CommonDest)
6477 *CommonDest = CaseDest;
6478 // If the destination isn't the common one, abort.
6479 if (CaseDest != *CommonDest)
6480 return false;
6481
6482 // Get the values for this case from phi nodes in the destination block.
6483 for (PHINode &PHI : (*CommonDest)->phis()) {
6484 int Idx = PHI.getBasicBlockIndex(Pred);
6485 if (Idx == -1)
6486 continue;
6487
6488 Constant *ConstVal =
6489 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6490 if (!ConstVal)
6491 return false;
6492
6493 // Be conservative about which kinds of constants we support.
6494 if (!validLookupTableConstant(ConstVal, TTI))
6495 return false;
6496
6497 Res.push_back(std::make_pair(&PHI, ConstVal));
6498 }
6499
6500 return Res.size() > 0;
6501}
6502
6503// Helper function used to add CaseVal to the list of cases that generate
6504// Result. Returns the updated number of cases that generate this result.
6505static size_t mapCaseToResult(ConstantInt *CaseVal,
6506 SwitchCaseResultVectorTy &UniqueResults,
6507 Constant *Result) {
6508 for (auto &I : UniqueResults) {
6509 if (I.first == Result) {
6510 I.second.push_back(CaseVal);
6511 return I.second.size();
6512 }
6513 }
6514 UniqueResults.push_back(
6515 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6516 return 1;
6517}
6518
6519// Helper function that initializes a map containing
6520// results for the PHI node of the common destination block for a switch
6521// instruction. Returns false if multiple PHI nodes have been found or if
6522// there is not a common destination block for the switch.
6524 BasicBlock *&CommonDest,
6525 SwitchCaseResultVectorTy &UniqueResults,
6526 Constant *&DefaultResult,
6527 const DataLayout &DL,
6528 const TargetTransformInfo &TTI,
6529 uintptr_t MaxUniqueResults) {
6530 for (const auto &I : SI->cases()) {
6531 ConstantInt *CaseVal = I.getCaseValue();
6532
6533 // Resulting value at phi nodes for this case value.
6534 SwitchCaseResultsTy Results;
6535 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6536 DL, TTI))
6537 return false;
6538
6539 // Only one value per case is permitted.
6540 if (Results.size() > 1)
6541 return false;
6542
6543 // Add the case->result mapping to UniqueResults.
6544 const size_t NumCasesForResult =
6545 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6546
6547 // Early out if there are too many cases for this result.
6548 if (NumCasesForResult > MaxSwitchCasesPerResult)
6549 return false;
6550
6551 // Early out if there are too many unique results.
6552 if (UniqueResults.size() > MaxUniqueResults)
6553 return false;
6554
6555 // Check the PHI consistency.
6556 if (!PHI)
6557 PHI = Results[0].first;
6558 else if (PHI != Results[0].first)
6559 return false;
6560 }
6561 // Find the default result value.
6563 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6564 DL, TTI);
6565 // If the default value is not found abort unless the default destination
6566 // is unreachable.
6567 DefaultResult =
6568 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6569
6570 return DefaultResult || SI->defaultDestUnreachable();
6571}
6572
6573// Helper function that checks if it is possible to transform a switch with only
6574// two cases (or two cases + default) that produces a result into a select.
6575// TODO: Handle switches with more than 2 cases that map to the same result.
6576// The branch weights correspond to the provided Condition (i.e. if Condition is
6577// modified from the original SwitchInst, the caller must adjust the weights)
6578static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6579 Constant *DefaultResult, Value *Condition,
6580 IRBuilder<> &Builder, const DataLayout &DL,
6581 ArrayRef<uint32_t> BranchWeights) {
6582 // If we are selecting between only two cases transform into a simple
6583 // select or a two-way select if default is possible.
6584 // Example:
6585 // switch (a) { %0 = icmp eq i32 %a, 10
6586 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6587 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6588 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6589 // }
6590
6591 const bool HasBranchWeights =
6592 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6593
6594 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6595 ResultVector[1].second.size() == 1) {
6596 ConstantInt *FirstCase = ResultVector[0].second[0];
6597 ConstantInt *SecondCase = ResultVector[1].second[0];
6598 Value *SelectValue = ResultVector[1].first;
6599 if (DefaultResult) {
6600 Value *ValueCompare =
6601 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6602 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6603 DefaultResult, "switch.select");
6604 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6605 SI && HasBranchWeights) {
6606 // We start with 3 probabilities, where the numerator is the
6607 // corresponding BranchWeights[i], and the denominator is the sum over
6608 // BranchWeights. We want the probability and negative probability of
6609 // Condition == SecondCase.
6610 assert(BranchWeights.size() == 3);
6612 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6613 /*IsExpected=*/false, /*ElideAllZero=*/true);
6614 }
6615 }
6616 Value *ValueCompare =
6617 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6618 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6619 SelectValue, "switch.select");
6620 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6621 // We may have had a DefaultResult. Base the position of the first and
6622 // second's branch weights accordingly. Also the proability that Condition
6623 // != FirstCase needs to take that into account.
6624 assert(BranchWeights.size() >= 2);
6625 size_t FirstCasePos = (Condition != nullptr);
6626 size_t SecondCasePos = FirstCasePos + 1;
6627 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6629 {BranchWeights[FirstCasePos],
6630 DefaultCase + BranchWeights[SecondCasePos]},
6631 /*IsExpected=*/false, /*ElideAllZero=*/true);
6632 }
6633 return Ret;
6634 }
6635
6636 // Handle the degenerate case where two cases have the same result value.
6637 if (ResultVector.size() == 1 && DefaultResult) {
6638 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6639 unsigned CaseCount = CaseValues.size();
6640 // n bits group cases map to the same result:
6641 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6642 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6643 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6644 if (isPowerOf2_32(CaseCount)) {
6645 ConstantInt *MinCaseVal = CaseValues[0];
6646 // If there are bits that are set exclusively by CaseValues, we
6647 // can transform the switch into a select if the conjunction of
6648 // all the values uniquely identify CaseValues.
6649 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6650
6651 // Find the minimum value and compute the and of all the case values.
6652 for (auto *Case : CaseValues) {
6653 if (Case->getValue().slt(MinCaseVal->getValue()))
6654 MinCaseVal = Case;
6655 AndMask &= Case->getValue();
6656 }
6657 KnownBits Known = computeKnownBits(Condition, DL);
6658
6659 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6660 // Compute the number of bits that are free to vary.
6661 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6662
6663 // Check if the number of values covered by the mask is equal
6664 // to the number of cases.
6665 if (FreeBits == Log2_32(CaseCount)) {
6666 Value *And = Builder.CreateAnd(Condition, AndMask);
6667 Value *Cmp = Builder.CreateICmpEQ(
6668 And, Constant::getIntegerValue(And->getType(), AndMask));
6669 Value *Ret =
6670 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6671 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6672 // We know there's a Default case. We base the resulting branch
6673 // weights off its probability.
6674 assert(BranchWeights.size() >= 2);
6676 *SI,
6677 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6678 /*IsExpected=*/false, /*ElideAllZero=*/true);
6679 }
6680 return Ret;
6681 }
6682 }
6683
6684 // Mark the bits case number touched.
6685 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6686 for (auto *Case : CaseValues)
6687 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6688
6689 // Check if cases with the same result can cover all number
6690 // in touched bits.
6691 if (BitMask.popcount() == Log2_32(CaseCount)) {
6692 if (!MinCaseVal->isNullValue())
6693 Condition = Builder.CreateSub(Condition, MinCaseVal);
6694 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6695 Value *Cmp = Builder.CreateICmpEQ(
6696 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6697 Value *Ret =
6698 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6699 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6700 assert(BranchWeights.size() >= 2);
6702 *SI,
6703 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6704 /*IsExpected=*/false, /*ElideAllZero=*/true);
6705 }
6706 return Ret;
6707 }
6708 }
6709
6710 // Handle the degenerate case where two cases have the same value.
6711 if (CaseValues.size() == 2) {
6712 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6713 "switch.selectcmp.case1");
6714 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6715 "switch.selectcmp.case2");
6716 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6717 Value *Ret =
6718 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6719 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6720 assert(BranchWeights.size() >= 2);
6722 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6723 /*IsExpected=*/false, /*ElideAllZero=*/true);
6724 }
6725 return Ret;
6726 }
6727 }
6728
6729 return nullptr;
6730}
6731
6732// Helper function to cleanup a switch instruction that has been converted into
6733// a select, fixing up PHI nodes and basic blocks.
6735 Value *SelectValue,
6736 IRBuilder<> &Builder,
6737 DomTreeUpdater *DTU) {
6738 std::vector<DominatorTree::UpdateType> Updates;
6739
6740 BasicBlock *SelectBB = SI->getParent();
6741 BasicBlock *DestBB = PHI->getParent();
6742
6743 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6744 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6745 Builder.CreateBr(DestBB);
6746
6747 // Remove the switch.
6748
6749 PHI->removeIncomingValueIf(
6750 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6751 PHI->addIncoming(SelectValue, SelectBB);
6752
6753 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6754 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6755 BasicBlock *Succ = SI->getSuccessor(i);
6756
6757 if (Succ == DestBB)
6758 continue;
6759 Succ->removePredecessor(SelectBB);
6760 if (DTU && RemovedSuccessors.insert(Succ).second)
6761 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6762 }
6763 SI->eraseFromParent();
6764 if (DTU)
6765 DTU->applyUpdates(Updates);
6766}
6767
6768/// If a switch is only used to initialize one or more phi nodes in a common
6769/// successor block with only two different constant values, try to replace the
6770/// switch with a select. Returns true if the fold was made.
6772 DomTreeUpdater *DTU, const DataLayout &DL,
6773 const TargetTransformInfo &TTI) {
6774 Value *const Cond = SI->getCondition();
6775 PHINode *PHI = nullptr;
6776 BasicBlock *CommonDest = nullptr;
6777 Constant *DefaultResult;
6778 SwitchCaseResultVectorTy UniqueResults;
6779 // Collect all the cases that will deliver the same value from the switch.
6780 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6781 DL, TTI, /*MaxUniqueResults*/ 2))
6782 return false;
6783
6784 assert(PHI != nullptr && "PHI for value select not found");
6785 Builder.SetInsertPoint(SI);
6786 SmallVector<uint32_t, 4> BranchWeights;
6788 [[maybe_unused]] auto HasWeights =
6790 assert(!HasWeights == (BranchWeights.empty()));
6791 }
6792 assert(BranchWeights.empty() ||
6793 (BranchWeights.size() >=
6794 UniqueResults.size() + (DefaultResult != nullptr)));
6795
6796 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6797 Builder, DL, BranchWeights);
6798 if (!SelectValue)
6799 return false;
6800
6801 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6802 return true;
6803}
6804
6805namespace {
6806
6807/// This class finds alternatives for switches to ultimately
6808/// replace the switch.
6809class SwitchReplacement {
6810public:
6811 /// Create a helper for optimizations to use as a switch replacement.
6812 /// Find a better representation for the content of Values,
6813 /// using DefaultValue to fill any holes in the table.
6814 SwitchReplacement(
6815 Module &M, uint64_t TableSize, ConstantInt *Offset,
6816 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6817 Constant *DefaultValue, const DataLayout &DL,
6818 const TargetTransformInfo &TTI, const StringRef &FuncName);
6819
6820 /// Build instructions with Builder to retrieve values using Index
6821 /// and replace the switch.
6822 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6823 Function *Func);
6824
6825 /// Return true if a table with TableSize elements of
6826 /// type ElementType would fit in a target-legal register.
6827 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6828 Type *ElementType);
6829
6830 /// Return the default value of the switch.
6831 Constant *getDefaultValue();
6832
6833 /// Return true if the replacement is a lookup table.
6834 bool isLookupTable();
6835
6836 /// Return true if the replacement is a bit map.
6837 bool isBitMap();
6838
6839private:
6840 // Depending on the switch, there are different alternatives.
6841 enum {
6842 // For switches where each case contains the same value, we just have to
6843 // store that single value and return it for each lookup.
6844 SingleValueKind,
6845
6846 // For switches where there is a linear relationship between table index
6847 // and values. We calculate the result with a simple multiplication
6848 // and addition instead of a table lookup.
6849 LinearMapKind,
6850
6851 // For small tables with integer elements, we can pack them into a bitmap
6852 // that fits into a target-legal register. Values are retrieved by
6853 // shift and mask operations.
6854 BitMapKind,
6855
6856 // The table is stored as an array of values. Values are retrieved by load
6857 // instructions from the table.
6858 LookupTableKind
6859 } Kind;
6860
6861 // The default value of the switch.
6862 Constant *DefaultValue;
6863
6864 // The type of the output values.
6865 Type *ValueType;
6866
6867 // For SingleValueKind, this is the single value.
6868 Constant *SingleValue = nullptr;
6869
6870 // For BitMapKind, this is the bitmap.
6871 ConstantInt *BitMap = nullptr;
6872 IntegerType *BitMapElementTy = nullptr;
6873
6874 // For LinearMapKind, these are the constants used to derive the value.
6875 ConstantInt *LinearOffset = nullptr;
6876 ConstantInt *LinearMultiplier = nullptr;
6877 bool LinearMapValWrapped = false;
6878
6879 // For LookupTableKind, this is the table.
6880 Constant *Initializer = nullptr;
6881};
6882
6883} // end anonymous namespace
6884
6885SwitchReplacement::SwitchReplacement(
6886 Module &M, uint64_t TableSize, ConstantInt *Offset,
6887 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6888 Constant *DefaultValue, const DataLayout &DL,
6889 const TargetTransformInfo &TTI, const StringRef &FuncName)
6890 : DefaultValue(DefaultValue) {
6891 assert(Values.size() && "Can't build lookup table without values!");
6892 assert(TableSize >= Values.size() && "Can't fit values in table!");
6893
6894 // If all values in the table are equal, this is that value.
6895 SingleValue = Values.begin()->second;
6896
6897 ValueType = Values.begin()->second->getType();
6898
6899 // Build up the table contents.
6900 SmallVector<Constant *, 64> TableContents(TableSize);
6901 for (const auto &[CaseVal, CaseRes] : Values) {
6902 assert(CaseRes->getType() == ValueType);
6903
6904 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6905 TableContents[Idx] = CaseRes;
6906
6907 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6908 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6909 }
6910
6911 // Fill in any holes in the table with the default result.
6912 if (Values.size() < TableSize) {
6913 assert(DefaultValue &&
6914 "Need a default value to fill the lookup table holes.");
6915 assert(DefaultValue->getType() == ValueType);
6916 for (uint64_t I = 0; I < TableSize; ++I) {
6917 if (!TableContents[I])
6918 TableContents[I] = DefaultValue;
6919 }
6920
6921 // If the default value is poison, all the holes are poison.
6922 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6923
6924 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6925 SingleValue = nullptr;
6926 }
6927
6928 // If each element in the table contains the same value, we only need to store
6929 // that single value.
6930 if (SingleValue) {
6931 Kind = SingleValueKind;
6932 return;
6933 }
6934
6935 // Check if we can derive the value with a linear transformation from the
6936 // table index.
6938 bool LinearMappingPossible = true;
6939 APInt PrevVal;
6940 APInt DistToPrev;
6941 // When linear map is monotonic and signed overflow doesn't happen on
6942 // maximum index, we can attach nsw on Add and Mul.
6943 bool NonMonotonic = false;
6944 assert(TableSize >= 2 && "Should be a SingleValue table.");
6945 // Check if there is the same distance between two consecutive values.
6946 for (uint64_t I = 0; I < TableSize; ++I) {
6947 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6948
6949 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6950 // This is an poison, so it's (probably) a lookup table hole.
6951 // To prevent any regressions from before we switched to using poison as
6952 // the default value, holes will fall back to using the first value.
6953 // This can be removed once we add proper handling for poisons in lookup
6954 // tables.
6955 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6956 }
6957
6958 if (!ConstVal) {
6959 // This is an undef. We could deal with it, but undefs in lookup tables
6960 // are very seldom. It's probably not worth the additional complexity.
6961 LinearMappingPossible = false;
6962 break;
6963 }
6964 const APInt &Val = ConstVal->getValue();
6965 if (I != 0) {
6966 APInt Dist = Val - PrevVal;
6967 if (I == 1) {
6968 DistToPrev = Dist;
6969 } else if (Dist != DistToPrev) {
6970 LinearMappingPossible = false;
6971 break;
6972 }
6973 NonMonotonic |=
6974 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6975 }
6976 PrevVal = Val;
6977 }
6978 if (LinearMappingPossible) {
6979 LinearOffset = cast<ConstantInt>(TableContents[0]);
6980 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6981 APInt M = LinearMultiplier->getValue();
6982 bool MayWrap = true;
6983 if (isIntN(M.getBitWidth(), TableSize - 1))
6984 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6985 LinearMapValWrapped = NonMonotonic || MayWrap;
6986 Kind = LinearMapKind;
6987 return;
6988 }
6989 }
6990
6991 // If the type is integer and the table fits in a register, build a bitmap.
6992 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6994 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6995 for (uint64_t I = TableSize; I > 0; --I) {
6996 TableInt <<= IT->getBitWidth();
6997 // Insert values into the bitmap. Undef values are set to zero.
6998 if (!isa<UndefValue>(TableContents[I - 1])) {
6999 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
7000 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
7001 }
7002 }
7003 BitMap = ConstantInt::get(M.getContext(), TableInt);
7004 BitMapElementTy = IT;
7005 Kind = BitMapKind;
7006 return;
7007 }
7008
7009 if (auto *IT = dyn_cast<IntegerType>(ValueType)) {
7010 ConstantRange Range(IT->getBitWidth(), false);
7011 for (Constant *Value : TableContents)
7012 if (!isa<UndefValue>(Value))
7013 Range = Range.unionWith(cast<ConstantInt>(Value)->getValue());
7014 // TODO: handle sign extension as well?
7015 unsigned NeededBitWidth =
7016 std::max(TTI.getMinimumLookupTableEntryBitWidth(),
7017 unsigned(PowerOf2Ceil(Range.getActiveBits())));
7018 if (NeededBitWidth < IT->getBitWidth()) {
7019 IntegerType *DstTy = IntegerType::get(IT->getContext(), NeededBitWidth);
7020 for (Constant *&Value : TableContents)
7021 Value = ConstantFoldCastInstruction(Instruction::Trunc, Value, DstTy);
7022 }
7023 }
7024
7025 // Store the table in an array.
7026 auto *TableTy = ArrayType::get(TableContents[0]->getType(), TableSize);
7027 Initializer = ConstantArray::get(TableTy, TableContents);
7028
7029 Kind = LookupTableKind;
7030}
7031
7032Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
7033 const DataLayout &DL, Function *Func) {
7034 switch (Kind) {
7035 case SingleValueKind:
7036 return SingleValue;
7037 case LinearMapKind: {
7038 ++NumLinearMaps;
7039 // Derive the result value from the input value.
7040 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
7041 false, "switch.idx.cast");
7042 if (!LinearMultiplier->isOne())
7043 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
7044 /*HasNUW = */ false,
7045 /*HasNSW = */ !LinearMapValWrapped);
7046
7047 if (!LinearOffset->isZero())
7048 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
7049 /*HasNUW = */ false,
7050 /*HasNSW = */ !LinearMapValWrapped);
7051 return Result;
7052 }
7053 case BitMapKind: {
7054 ++NumBitMaps;
7055 // Type of the bitmap (e.g. i59).
7056 IntegerType *MapTy = BitMap->getIntegerType();
7057
7058 // Cast Index to the same type as the bitmap.
7059 // Note: The Index is <= the number of elements in the table, so
7060 // truncating it to the width of the bitmask is safe.
7061 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
7062
7063 // Multiply the shift amount by the element width. NUW/NSW can always be
7064 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
7065 // BitMap's bit width.
7066 ShiftAmt = Builder.CreateMul(
7067 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7068 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7069
7070 // Shift down.
7071 Value *DownShifted =
7072 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7073 // Mask off.
7074 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7075 }
7076 case LookupTableKind: {
7077 ++NumLookupTables;
7078 auto *Table =
7079 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7080 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7081 Initializer, "switch.table." + Func->getName());
7082 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7083 // Set the alignment to that of an array items. We will be only loading one
7084 // value out of it.
7085 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7086 Type *IndexTy = DL.getIndexType(Table->getType());
7087 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7088
7089 if (Index->getType() != IndexTy) {
7090 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7091 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7092 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7093 Zext->setNonNeg(
7094 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7095 }
7096
7097 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7098 Value *GEP =
7099 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7100 Value *Load =
7101 Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7102 if (Load->getType() == ValueType)
7103 return Load;
7104 return Builder.CreateZExt(Load, ValueType, "switch.ext");
7105 }
7106 }
7107 llvm_unreachable("Unknown helper kind!");
7108}
7109
7110bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7111 uint64_t TableSize,
7112 Type *ElementType) {
7113 auto *IT = dyn_cast<IntegerType>(ElementType);
7114 if (!IT)
7115 return false;
7116 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7117 // are <= 15, we could try to narrow the type.
7118
7119 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7120 if (TableSize >= UINT_MAX / IT->getBitWidth())
7121 return false;
7122 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7123}
7124
7126 const DataLayout &DL) {
7127 // Allow any legal type.
7128 if (TTI.isTypeLegal(Ty))
7129 return true;
7130
7131 auto *IT = dyn_cast<IntegerType>(Ty);
7132 if (!IT)
7133 return false;
7134
7135 // Also allow power of 2 integer types that have at least 8 bits and fit in
7136 // a register. These types are common in frontend languages and targets
7137 // usually support loads of these types.
7138 // TODO: We could relax this to any integer that fits in a register and rely
7139 // on ABI alignment and padding in the table to allow the load to be widened.
7140 // Or we could widen the constants and truncate the load.
7141 unsigned BitWidth = IT->getBitWidth();
7142 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7143 DL.fitsInLegalInteger(IT->getBitWidth());
7144}
7145
7146Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7147
7148bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7149
7150bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7151
7152static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize) {
7153 // 40% is the default density for building a jump table in optsize/minsize
7154 // mode, 10% is the default density for jump tables. See also
7155 // TargetLoweringBase::isSuitableForJumpTable(), which this function was based
7156 // on.
7157 const uint64_t MinDensity = OptSize ? 40 : 10;
7158
7159 if (CaseRange >= UINT64_MAX / 100)
7160 return false; // Avoid multiplication overflows below.
7161
7162 return NumCases * 100 >= CaseRange * MinDensity;
7163}
7164
7165static bool isSwitchDense(ArrayRef<int64_t> Values, bool OptSize) {
7166 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7167 uint64_t Range = Diff + 1;
7168 if (Range < Diff)
7169 return false; // Overflow.
7170
7171 return isSwitchDense(Values.size(), Range, OptSize);
7172}
7173
7174/// Determine whether a lookup table should be built for this switch, based on
7175/// the number of cases, size of the table, and the types of the results.
7176// TODO: We could support larger than legal types by limiting based on the
7177// number of loads required and/or table size. If the constants are small we
7178// could use smaller table entries and extend after the load.
7180 const TargetTransformInfo &TTI,
7181 const DataLayout &DL,
7182 const SmallVector<Type *> &ResultTypes) {
7183 if (SI->getNumCases() > TableSize)
7184 return false; // TableSize overflowed.
7185
7186 bool AllTablesFitInRegister = true;
7187 bool HasIllegalType = false;
7188 for (const auto &Ty : ResultTypes) {
7189 // Saturate this flag to true.
7190 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7191
7192 // Saturate this flag to false.
7193 AllTablesFitInRegister =
7194 AllTablesFitInRegister &&
7195 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7196
7197 // If both flags saturate, we're done. NOTE: This *only* works with
7198 // saturating flags, and all flags have to saturate first due to the
7199 // non-deterministic behavior of iterating over a dense map.
7200 if (HasIllegalType && !AllTablesFitInRegister)
7201 break;
7202 }
7203
7204 // If each table would fit in a register, we should build it anyway.
7205 if (AllTablesFitInRegister)
7206 return true;
7207
7208 // Don't build a table that doesn't fit in-register if it has illegal types.
7209 if (HasIllegalType)
7210 return false;
7211
7212 return isSwitchDense(SI->getNumCases(), TableSize,
7213 SI->getFunction()->hasOptSize());
7214}
7215
7217 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7218 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7219 const DataLayout &DL, const TargetTransformInfo &TTI) {
7220 if (MinCaseVal.isNullValue())
7221 return true;
7222 if (MinCaseVal.isNegative() ||
7223 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7224 !HasDefaultResults)
7225 return false;
7226 return all_of(ResultTypes, [&](const auto &ResultType) {
7227 return SwitchReplacement::wouldFitInRegister(
7228 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7229 });
7230}
7231
7232/// Try to reuse the switch table index compare. Following pattern:
7233/// \code
7234/// if (idx < tablesize)
7235/// r = table[idx]; // table does not contain default_value
7236/// else
7237/// r = default_value;
7238/// if (r != default_value)
7239/// ...
7240/// \endcode
7241/// Is optimized to:
7242/// \code
7243/// cond = idx < tablesize;
7244/// if (cond)
7245/// r = table[idx];
7246/// else
7247/// r = default_value;
7248/// if (cond)
7249/// ...
7250/// \endcode
7251/// Jump threading will then eliminate the second if(cond).
7253 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7254 Constant *DefaultValue,
7255 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7257 if (!CmpInst)
7258 return;
7259
7260 // We require that the compare is in the same block as the phi so that jump
7261 // threading can do its work afterwards.
7262 if (CmpInst->getParent() != PhiBlock)
7263 return;
7264
7266 if (!CmpOp1)
7267 return;
7268
7269 Value *RangeCmp = RangeCheckBranch->getCondition();
7270 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7271 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7272
7273 // Check if the compare with the default value is constant true or false.
7274 const DataLayout &DL = PhiBlock->getDataLayout();
7276 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7277 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7278 return;
7279
7280 // Check if the compare with the case values is distinct from the default
7281 // compare result.
7282 for (auto ValuePair : Values) {
7284 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7285 if (!CaseConst || CaseConst == DefaultConst ||
7286 (CaseConst != TrueConst && CaseConst != FalseConst))
7287 return;
7288 }
7289
7290 // Check if the branch instruction dominates the phi node. It's a simple
7291 // dominance check, but sufficient for our needs.
7292 // Although this check is invariant in the calling loops, it's better to do it
7293 // at this late stage. Practically we do it at most once for a switch.
7294 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7295 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7296 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7297 return;
7298 }
7299
7300 if (DefaultConst == FalseConst) {
7301 // The compare yields the same result. We can replace it.
7302 CmpInst->replaceAllUsesWith(RangeCmp);
7303 ++NumTableCmpReuses;
7304 } else {
7305 // The compare yields the same result, just inverted. We can replace it.
7306 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7307 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7308 RangeCheckBranch->getIterator());
7309 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7310 ++NumTableCmpReuses;
7311 }
7312}
7313
7314/// If the switch is only used to initialize one or more phi nodes in a common
7315/// successor block with different constant values, replace the switch with
7316/// lookup tables.
7318 DomTreeUpdater *DTU, const DataLayout &DL,
7319 const TargetTransformInfo &TTI,
7320 bool ConvertSwitchToLookupTable) {
7321 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7322
7323 BasicBlock *BB = SI->getParent();
7324 Function *Fn = BB->getParent();
7325
7326 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7327 // split off a dense part and build a lookup table for that.
7328
7329 // FIXME: This creates arrays of GEPs to constant strings, which means each
7330 // GEP needs a runtime relocation in PIC code. We should just build one big
7331 // string and lookup indices into that.
7332
7333 // Ignore switches with less than three cases. Lookup tables will not make
7334 // them faster, so we don't analyze them.
7335 if (SI->getNumCases() < 3)
7336 return false;
7337
7338 // Figure out the corresponding result for each case value and phi node in the
7339 // common destination, as well as the min and max case values.
7340 assert(!SI->cases().empty());
7341 SwitchInst::CaseIt CI = SI->case_begin();
7342 ConstantInt *MinCaseVal = CI->getCaseValue();
7343 ConstantInt *MaxCaseVal = CI->getCaseValue();
7344
7345 BasicBlock *CommonDest = nullptr;
7346
7347 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7349
7351 SmallVector<Type *> ResultTypes;
7353
7354 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7355 ConstantInt *CaseVal = CI->getCaseValue();
7356 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7357 MinCaseVal = CaseVal;
7358 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7359 MaxCaseVal = CaseVal;
7360
7361 // Resulting value at phi nodes for this case value.
7363 ResultsTy Results;
7364 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7365 Results, DL, TTI))
7366 return false;
7367
7368 // Append the result and result types from this case to the list for each
7369 // phi.
7370 for (const auto &I : Results) {
7371 PHINode *PHI = I.first;
7372 Constant *Value = I.second;
7373 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7374 if (Inserted)
7375 PHIs.push_back(PHI);
7376 It->second.push_back(std::make_pair(CaseVal, Value));
7377 ResultTypes.push_back(PHI->getType());
7378 }
7379 }
7380
7381 // If the table has holes, we need a constant result for the default case
7382 // or a bitmask that fits in a register.
7383 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7384 bool HasDefaultResults =
7385 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7386 DefaultResultsList, DL, TTI);
7387 for (const auto &I : DefaultResultsList) {
7388 PHINode *PHI = I.first;
7389 Constant *Result = I.second;
7390 DefaultResults[PHI] = Result;
7391 }
7392
7393 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7394 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7395 uint64_t TableSize;
7396 ConstantInt *TableIndexOffset;
7397 if (UseSwitchConditionAsTableIndex) {
7398 TableSize = MaxCaseVal->getLimitedValue() + 1;
7399 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7400 } else {
7401 TableSize =
7402 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7403
7404 TableIndexOffset = MinCaseVal;
7405 }
7406
7407 // If the default destination is unreachable, or if the lookup table covers
7408 // all values of the conditional variable, branch directly to the lookup table
7409 // BB. Otherwise, check that the condition is within the case range.
7410 uint64_t NumResults = ResultLists[PHIs[0]].size();
7411 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7412
7413 bool TableHasHoles = (NumResults < TableSize);
7414
7415 // If the table has holes but the default destination doesn't produce any
7416 // constant results, the lookup table entries corresponding to the holes will
7417 // contain poison.
7418 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7419
7420 // If the default destination doesn't produce a constant result but is still
7421 // reachable, and the lookup table has holes, we need to use a mask to
7422 // determine if the current index should load from the lookup table or jump
7423 // to the default case.
7424 // The mask is unnecessary if the table has holes but the default destination
7425 // is unreachable, as in that case the holes must also be unreachable.
7426 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7427 if (NeedMask) {
7428 // As an extra penalty for the validity test we require more cases.
7429 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7430 return false;
7431 if (!DL.fitsInLegalInteger(TableSize))
7432 return false;
7433 }
7434
7435 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7436 return false;
7437
7438 // Compute the table index value.
7439 Value *TableIndex;
7440 if (UseSwitchConditionAsTableIndex) {
7441 TableIndex = SI->getCondition();
7442 if (HasDefaultResults) {
7443 // Grow the table to cover all possible index values to avoid the range
7444 // check. It will use the default result to fill in the table hole later,
7445 // so make sure it exist.
7446 ConstantRange CR = computeConstantRange(TableIndex, /*ForSigned=*/false,
7447 SimplifyQuery(DL));
7448 // Grow the table shouldn't have any size impact by checking
7449 // wouldFitInRegister.
7450 // TODO: Consider growing the table also when it doesn't fit in a register
7451 // if no optsize is specified.
7452 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7453 if (!CR.isUpperWrapped() &&
7454 all_of(ResultTypes, [&](const auto &ResultType) {
7455 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7456 ResultType);
7457 })) {
7458 // There may be some case index larger than the UpperBound (unreachable
7459 // case), so make sure the table size does not get smaller.
7460 TableSize = std::max(UpperBound, TableSize);
7461 // The default branch is unreachable after we enlarge the lookup table.
7462 // Adjust DefaultIsReachable to reuse code path.
7463 DefaultIsReachable = false;
7464 }
7465 }
7466 }
7467
7468 // Keep track of the switch replacement for each phi
7470 for (PHINode *PHI : PHIs) {
7471 const auto &ResultList = ResultLists[PHI];
7472
7473 Type *ResultType = ResultList.begin()->second->getType();
7474 // Use any value to fill the lookup table holes.
7475 Constant *DefaultVal =
7476 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7477 StringRef FuncName = Fn->getName();
7478 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7479 ResultList, DefaultVal, DL, TTI, FuncName);
7480 PhiToReplacementMap.insert({PHI, Replacement});
7481 }
7482
7483 bool AnyLookupTables = any_of(
7484 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7485 bool AnyBitMaps = any_of(PhiToReplacementMap,
7486 [](auto &KV) { return KV.second.isBitMap(); });
7487
7488 // A few conditions prevent the generation of lookup tables:
7489 // 1. The target does not support lookup tables.
7490 // 2. The "no-jump-tables" function attribute is set.
7491 // However, these objections do not apply to other switch replacements, like
7492 // the bitmap, so we only stop here if any of these conditions are met and we
7493 // want to create a LUT. Otherwise, continue with the switch replacement.
7494 if (AnyLookupTables &&
7495 (!TTI.shouldBuildLookupTables() ||
7496 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7497 return false;
7498
7499 // In the early optimization pipeline, disable formation of lookup tables,
7500 // bit maps and mask checks, as they may inhibit further optimization.
7501 if (!ConvertSwitchToLookupTable &&
7502 (AnyLookupTables || AnyBitMaps || NeedMask))
7503 return false;
7504
7505 Builder.SetInsertPoint(SI);
7506 // TableIndex is the switch condition - TableIndexOffset if we don't
7507 // use the condition directly
7508 if (!UseSwitchConditionAsTableIndex) {
7509 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7510 // we can try to attach nsw.
7511 bool MayWrap = true;
7512 if (!DefaultIsReachable) {
7513 APInt Res =
7514 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7515 (void)Res;
7516 }
7517 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7518 "switch.tableidx", /*HasNUW =*/false,
7519 /*HasNSW =*/!MayWrap);
7520 }
7521
7522 std::vector<DominatorTree::UpdateType> Updates;
7523
7524 // Compute the maximum table size representable by the integer type we are
7525 // switching upon.
7526 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7527 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7528 assert(MaxTableSize >= TableSize &&
7529 "It is impossible for a switch to have more entries than the max "
7530 "representable value of its input integer type's size.");
7531
7532 // Create the BB that does the lookups.
7533 Module &Mod = *CommonDest->getParent()->getParent();
7534 BasicBlock *LookupBB = BasicBlock::Create(
7535 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7536
7537 CondBrInst *RangeCheckBranch = nullptr;
7538 CondBrInst *CondBranch = nullptr;
7539
7540 Builder.SetInsertPoint(SI);
7541 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7542 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7543 Builder.CreateBr(LookupBB);
7544 if (DTU)
7545 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7546 // Note: We call removeProdecessor later since we need to be able to get the
7547 // PHI value for the default case in case we're using a bit mask.
7548 } else {
7549 Value *Cmp = Builder.CreateICmpULT(
7550 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7551 RangeCheckBranch =
7552 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7553 CondBranch = RangeCheckBranch;
7554 if (DTU)
7555 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7556 }
7557
7558 // Populate the BB that does the lookups.
7559 Builder.SetInsertPoint(LookupBB);
7560
7561 if (NeedMask) {
7562 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7563 // re-purposed to do the hole check, and we create a new LookupBB.
7564 BasicBlock *MaskBB = LookupBB;
7565 MaskBB->setName("switch.hole_check");
7566 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7567 CommonDest->getParent(), CommonDest);
7568
7569 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7570 // unnecessary illegal types.
7571 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7572 APInt MaskInt(TableSizePowOf2, 0);
7573 APInt One(TableSizePowOf2, 1);
7574 // Build bitmask; fill in a 1 bit for every case.
7575 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7576 for (const auto &Result : ResultList) {
7577 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7578 .getLimitedValue();
7579 MaskInt |= One << Idx;
7580 }
7581 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7582
7583 // Get the TableIndex'th bit of the bitmask.
7584 // If this bit is 0 (meaning hole) jump to the default destination,
7585 // else continue with table lookup.
7586 IntegerType *MapTy = TableMask->getIntegerType();
7587 Value *MaskIndex =
7588 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7589 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7590 Value *LoBit = Builder.CreateTrunc(
7591 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7592 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7593 if (DTU) {
7594 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7595 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7596 }
7597 Builder.SetInsertPoint(LookupBB);
7598 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7599 }
7600
7601 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7602 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7603 // do not delete PHINodes here.
7604 SI->getDefaultDest()->removePredecessor(BB,
7605 /*KeepOneInputPHIs=*/true);
7606 if (DTU)
7607 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7608 }
7609
7610 for (PHINode *PHI : PHIs) {
7611 const ResultListTy &ResultList = ResultLists[PHI];
7612 auto Replacement = PhiToReplacementMap.at(PHI);
7613 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7614 // Do a small peephole optimization: re-use the switch table compare if
7615 // possible.
7616 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7617 BasicBlock *PhiBlock = PHI->getParent();
7618 // Search for compare instructions which use the phi.
7619 for (auto *User : PHI->users()) {
7620 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7621 Replacement.getDefaultValue(), ResultList);
7622 }
7623 }
7624
7625 PHI->addIncoming(Result, LookupBB);
7626 }
7627
7628 Builder.CreateBr(CommonDest);
7629 if (DTU)
7630 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7631
7632 SmallVector<uint32_t> BranchWeights;
7633 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7634 extractBranchWeights(*SI, BranchWeights);
7635 uint64_t ToLookupWeight = 0;
7636 uint64_t ToDefaultWeight = 0;
7637
7638 // Remove the switch.
7639 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7640 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7641 BasicBlock *Succ = SI->getSuccessor(I);
7642
7643 if (Succ == SI->getDefaultDest()) {
7644 if (HasBranchWeights)
7645 ToDefaultWeight += BranchWeights[I];
7646 continue;
7647 }
7648 Succ->removePredecessor(BB);
7649 if (DTU && RemovedSuccessors.insert(Succ).second)
7650 Updates.push_back({DominatorTree::Delete, BB, Succ});
7651 if (HasBranchWeights)
7652 ToLookupWeight += BranchWeights[I];
7653 }
7654 SI->eraseFromParent();
7655 if (HasBranchWeights)
7656 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7657 /*IsExpected=*/false);
7658 if (DTU)
7659 DTU->applyUpdates(Updates);
7660
7661 if (NeedMask)
7662 ++NumLookupTablesHoles;
7663 return true;
7664}
7665
7666/// Try to transform a switch that has "holes" in it to a contiguous sequence
7667/// of cases.
7668///
7669/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7670/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7671///
7672/// This converts a sparse switch into a dense switch which allows better
7673/// lowering and could also allow transforming into a lookup table.
7675 const DataLayout &DL,
7676 const TargetTransformInfo &TTI) {
7677 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7678 if (CondTy->getIntegerBitWidth() > 64 ||
7679 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7680 return false;
7681 // Only bother with this optimization if there are more than 3 switch cases;
7682 // SDAG will only bother creating jump tables for 4 or more cases.
7683 if (SI->getNumCases() < 4)
7684 return false;
7685
7686 // This transform is agnostic to the signedness of the input or case values. We
7687 // can treat the case values as signed or unsigned. We can optimize more common
7688 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7689 // as signed.
7691 for (const auto &C : SI->cases())
7692 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7694
7695 // If the switch is already dense, there's nothing useful to do here.
7696 if (isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7697 return false;
7698
7699 // First, transform the values such that they start at zero and ascend.
7700 int64_t Base = Values[0];
7701 for (auto &V : Values)
7702 V -= (uint64_t)(Base);
7703
7704 // Now we have signed numbers that have been shifted so that, given enough
7705 // precision, there are no negative values. Since the rest of the transform
7706 // is bitwise only, we switch now to an unsigned representation.
7707
7708 // This transform can be done speculatively because it is so cheap - it
7709 // results in a single rotate operation being inserted.
7710
7711 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7712 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7713 // less than 64.
7714 unsigned Shift = 64;
7715 for (auto &V : Values)
7716 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7717 assert(Shift < 64);
7718 if (Shift > 0)
7719 for (auto &V : Values)
7720 V = (int64_t)((uint64_t)V >> Shift);
7721
7722 if (!isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7723 // Transform didn't create a dense switch.
7724 return false;
7725
7726 // The obvious transform is to shift the switch condition right and emit a
7727 // check that the condition actually cleanly divided by GCD, i.e.
7728 // C & (1 << Shift - 1) == 0
7729 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7730 //
7731 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7732 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7733 // are nonzero then the switch condition will be very large and will hit the
7734 // default case.
7735
7736 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7737 Builder.SetInsertPoint(SI);
7738 Value *Sub =
7739 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7740 Value *Rot = Builder.CreateIntrinsic(
7741 Ty, Intrinsic::fshl,
7742 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7743 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7744
7745 for (auto Case : SI->cases()) {
7746 auto *Orig = Case.getCaseValue();
7747 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7748 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7749 }
7750 return true;
7751}
7752
7753/// Tries to transform the switch when the condition is umin with a constant.
7754/// In that case, the default branch can be replaced by the constant's branch.
7755/// This method also removes dead cases when the simplification cannot replace
7756/// the default branch.
7757///
7758/// For example:
7759/// switch(umin(a, 3)) {
7760/// case 0:
7761/// case 1:
7762/// case 2:
7763/// case 3:
7764/// case 4:
7765/// // ...
7766/// default:
7767/// unreachable
7768/// }
7769///
7770/// Transforms into:
7771///
7772/// switch(a) {
7773/// case 0:
7774/// case 1:
7775/// case 2:
7776/// default:
7777/// // This is case 3
7778/// }
7780 Value *A;
7782
7783 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7784 return false;
7785
7788 BasicBlock *BB = SIW->getParent();
7789
7790 // Dead cases are removed even when the simplification fails.
7791 // A case is dead when its value is higher than the Constant.
7792 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7793 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7794 ++I;
7795 continue;
7796 }
7797 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7798 DeadCaseBB->removePredecessor(BB);
7799 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7800 I = SIW.removeCase(I);
7801 E = SIW->case_end();
7802 }
7803
7804 auto Case = SI->findCaseValue(Constant);
7805 // If the case value is not found, `findCaseValue` returns the default case.
7806 // In this scenario, since there is no explicit `case 3:`, the simplification
7807 // fails. The simplification also fails when the switch’s default destination
7808 // is reachable.
7809 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7810 if (DTU)
7811 DTU->applyUpdates(Updates);
7812 return !Updates.empty();
7813 }
7814
7815 BasicBlock *Unreachable = SI->getDefaultDest();
7816 SIW.replaceDefaultDest(Case);
7817 SIW.removeCase(Case);
7818 SIW->setCondition(A);
7819
7820 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7821
7822 if (DTU)
7823 DTU->applyUpdates(Updates);
7824
7825 return true;
7826}
7827
7828/// Tries to transform switch of powers of two to reduce switch range.
7829/// For example, switch like:
7830/// switch (C) { case 1: case 2: case 64: case 128: }
7831/// will be transformed to:
7832/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7833///
7834/// This transformation allows better lowering and may transform the switch
7835/// instruction into a sequence of bit manipulation and a smaller
7836/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7837/// address of the jump target, and indirectly jump to it).
7839 DomTreeUpdater *DTU,
7840 const DataLayout &DL,
7841 const TargetTransformInfo &TTI) {
7842 Value *Condition = SI->getCondition();
7843 LLVMContext &Context = SI->getContext();
7844 auto *CondTy = cast<IntegerType>(Condition->getType());
7845
7846 if (CondTy->getIntegerBitWidth() > 64 ||
7847 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7848 return false;
7849
7850 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7851 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7852 {Condition, ConstantInt::getTrue(Context)});
7853 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7854 TTI::TCC_Basic * 2)
7855 return false;
7856
7857 // Only bother with this optimization if there are more than 3 switch cases.
7858 // SDAG will start emitting jump tables for 4 or more cases.
7859 if (SI->getNumCases() < 4)
7860 return false;
7861
7862 // Check that switch cases are powers of two.
7864 for (const auto &Case : SI->cases()) {
7865 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7866 if (llvm::has_single_bit(CaseValue))
7867 Values.push_back(CaseValue);
7868 else
7869 return false;
7870 }
7871
7872 // isSwichDense requires case values to be sorted.
7874 if (!isSwitchDense(Values.size(),
7875 llvm::countr_zero(Values.back()) -
7876 llvm::countr_zero(Values.front()) + 1,
7877 SI->getFunction()->hasOptSize()))
7878 // Transform is unable to generate dense switch.
7879 return false;
7880
7881 Builder.SetInsertPoint(SI);
7882
7883 if (!SI->defaultDestUnreachable()) {
7884 // Let non-power-of-two inputs jump to the default case, when the latter is
7885 // reachable.
7886 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7887 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7888
7889 auto *OrigBB = SI->getParent();
7890 auto *DefaultCaseBB = SI->getDefaultDest();
7891 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7892 auto It = OrigBB->getTerminator()->getIterator();
7893 SmallVector<uint32_t> Weights;
7894 auto HasWeights =
7896 auto *BI = CondBrInst::Create(IsPow2, SplitBB, DefaultCaseBB, It);
7897 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7898 // IsPow2 covers a subset of the cases in which we'd go to the default
7899 // label. The other is those powers of 2 that don't appear in the case
7900 // statement. We don't know the distribution of the values coming in, so
7901 // the safest is to split 50-50 the original probability to `default`.
7902 uint64_t OrigDenominator =
7904 SmallVector<uint64_t> NewWeights(2);
7905 NewWeights[1] = Weights[0] / 2;
7906 NewWeights[0] = OrigDenominator - NewWeights[1];
7907 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7908 // The probability of executing the default block stays constant. It was
7909 // p_d = Weights[0] / OrigDenominator
7910 // we rewrite as W/D
7911 // We want to find the probability of the default branch of the switch
7912 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7913 // i.e. the original probability is the probability we go to the default
7914 // branch from the BI branch, or we take the default branch on the SI.
7915 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7916 // This matches using W/2 for the default branch probability numerator and
7917 // D-W/2 as the denominator.
7918 Weights[0] = NewWeights[1];
7919 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7920 for (auto &W : drop_begin(Weights))
7921 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7922
7923 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7924 }
7925 // BI is handling the default case for SI, and so should share its DebugLoc.
7926 BI->setDebugLoc(SI->getDebugLoc());
7927 It->eraseFromParent();
7928
7929 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7930 if (DTU)
7931 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7932 }
7933
7934 // Replace each case with its trailing zeros number.
7935 for (auto &Case : SI->cases()) {
7936 auto *OrigValue = Case.getCaseValue();
7937 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7938 OrigValue->getValue().countr_zero()));
7939 }
7940
7941 // Replace condition with its trailing zeros number.
7942 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7943 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7944
7945 SI->setCondition(ConditionTrailingZeros);
7946
7947 return true;
7948}
7949
7950/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7951/// the same destination.
7953 DomTreeUpdater *DTU) {
7954 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7955 if (!Cmp || !Cmp->hasOneUse())
7956 return false;
7957
7959 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7960 if (!HasWeights)
7961 Weights.resize(4); // Avoid checking HasWeights everywhere.
7962
7963 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7964 int64_t Res;
7965 BasicBlock *Succ, *OtherSucc;
7966 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7967 BasicBlock *Unreachable = nullptr;
7968
7969 if (SI->getNumCases() == 2) {
7970 // Find which of 1, 0 or -1 is missing (handled by default dest).
7971 SmallSet<int64_t, 3> Missing;
7972 Missing.insert(1);
7973 Missing.insert(0);
7974 Missing.insert(-1);
7975
7976 Succ = SI->getDefaultDest();
7977 SuccWeight = Weights[0];
7978 OtherSucc = nullptr;
7979 for (auto &Case : SI->cases()) {
7980 std::optional<int64_t> Val =
7981 Case.getCaseValue()->getValue().trySExtValue();
7982 if (!Val)
7983 return false;
7984 if (!Missing.erase(*Val))
7985 return false;
7986 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7987 return false;
7988 OtherSucc = Case.getCaseSuccessor();
7989 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7990 }
7991
7992 assert(Missing.size() == 1 && "Should have one case left");
7993 Res = *Missing.begin();
7994 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7995 // Normalize so that Succ is taken once and OtherSucc twice.
7996 Unreachable = SI->getDefaultDest();
7997 Succ = OtherSucc = nullptr;
7998 for (auto &Case : SI->cases()) {
7999 BasicBlock *NewSucc = Case.getCaseSuccessor();
8000 uint32_t Weight = Weights[Case.getSuccessorIndex()];
8001 if (!OtherSucc || OtherSucc == NewSucc) {
8002 OtherSucc = NewSucc;
8003 OtherSuccWeight += Weight;
8004 } else if (!Succ) {
8005 Succ = NewSucc;
8006 SuccWeight = Weight;
8007 } else if (Succ == NewSucc) {
8008 std::swap(Succ, OtherSucc);
8009 std::swap(SuccWeight, OtherSuccWeight);
8010 } else
8011 return false;
8012 }
8013 for (auto &Case : SI->cases()) {
8014 std::optional<int64_t> Val =
8015 Case.getCaseValue()->getValue().trySExtValue();
8016 if (!Val || (Val != 1 && Val != 0 && Val != -1))
8017 return false;
8018 if (Case.getCaseSuccessor() == Succ) {
8019 Res = *Val;
8020 break;
8021 }
8022 }
8023 } else {
8024 return false;
8025 }
8026
8027 // Determine predicate for the missing case.
8029 switch (Res) {
8030 case 1:
8031 Pred = ICmpInst::ICMP_UGT;
8032 break;
8033 case 0:
8034 Pred = ICmpInst::ICMP_EQ;
8035 break;
8036 case -1:
8037 Pred = ICmpInst::ICMP_ULT;
8038 break;
8039 }
8040 if (Cmp->isSigned())
8041 Pred = ICmpInst::getSignedPredicate(Pred);
8042
8043 MDNode *NewWeights = nullptr;
8044 if (HasWeights)
8045 NewWeights = MDBuilder(SI->getContext())
8046 .createBranchWeights(SuccWeight, OtherSuccWeight);
8047
8048 BasicBlock *BB = SI->getParent();
8049 Builder.SetInsertPoint(SI->getIterator());
8050 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
8051 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
8052 SI->getMetadata(LLVMContext::MD_unpredictable));
8053 OtherSucc->removePredecessor(BB);
8054 if (Unreachable)
8055 Unreachable->removePredecessor(BB);
8056 SI->eraseFromParent();
8057 Cmp->eraseFromParent();
8058 if (DTU && Unreachable)
8059 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
8060 return true;
8061}
8062
8063/// Checking whether two BBs are equal depends on the contents of the
8064/// BasicBlock and the incoming values of their successor PHINodes.
8065/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
8066/// calling this function on each BasicBlock every time isEqual is called,
8067/// especially since the same BasicBlock may be passed as an argument multiple
8068/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
8069/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
8070/// of the incoming values.
8073
8074 // One Phi usually has < 8 incoming values.
8078
8079 // We only merge the identical non-entry BBs with
8080 // - terminator unconditional br to Succ (pending relaxation),
8081 // - does not have address taken / weird control.
8082 static bool canBeMerged(const BasicBlock *BB) {
8083 assert(BB && "Expected non-null BB");
8084 // Entry block cannot be eliminated or have predecessors.
8085 if (BB->isEntryBlock())
8086 return false;
8087
8088 // Single successor and must be Succ.
8089 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8090 // on other kinds of terminators. We decide to only support unconditional
8091 // branches for now for compile time reasons.
8092 auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
8093 if (!BI)
8094 return false;
8095
8096 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
8097 // uses.
8098 if (BB->hasAddressTaken() || BB->isEHPad())
8099 return false;
8100
8101 // TODO: relax this condition to merge equal blocks with >1 instructions?
8102 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8103 if (&BB->front() != &BB->back())
8104 return false;
8105
8106 // The BB must have at least one predecessor.
8107 if (pred_empty(BB))
8108 return false;
8109
8110 return true;
8111 }
8112};
8113
8115 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8116 BasicBlock *BB = EBW->BB;
8118 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8119
8120 // Since we assume the BB is just a single UncondBrInst with a single
8121 // successor, we hash as the BB and the incoming Values of its successor
8122 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8123 // including the incoming PHI values leads to better performance.
8124 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8125 // time and passing it in EqualBBWrapper, but this slowed down the average
8126 // compile time without having any impact on the worst case compile time.
8127 BasicBlock *Succ = BI->getSuccessor();
8128 auto PhiValsForBB = map_range(Succ->phis(), [&](PHINode &Phi) {
8129 return (*EBW->PhiPredIVs)[&Phi][BB];
8130 });
8131 return hash_combine(Succ, hash_combine_range(PhiValsForBB));
8132 }
8133 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8134 BasicBlock *A = LHS->BB;
8135 BasicBlock *B = RHS->BB;
8136
8137 // FIXME: we checked that the size of A and B are both 1 in
8138 // mergeIdenticalUncondBBs to make the Case list smaller to
8139 // improve performance. If we decide to support BasicBlocks with more
8140 // than just a single instruction, we need to check that A.size() ==
8141 // B.size() here, and we need to check more than just the BranchInsts
8142 // for equality.
8143
8144 UncondBrInst *ABI = cast<UncondBrInst>(A->getTerminator());
8145 UncondBrInst *BBI = cast<UncondBrInst>(B->getTerminator());
8146 if (ABI->getSuccessor() != BBI->getSuccessor())
8147 return false;
8148
8149 // Need to check that PHIs in successor have matching values.
8150 BasicBlock *Succ = ABI->getSuccessor();
8151 auto IfPhiIVMatch = [&](PHINode &Phi) {
8152 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8153 // query.
8154 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8155 return PredIVs[A] == PredIVs[B];
8156 };
8157 return all_of(Succ->phis(), IfPhiIVMatch);
8158 }
8159};
8160
8161// Merge identical BBs into one of them.
8163 DomTreeUpdater *DTU) {
8164 if (Candidates.size() < 2)
8165 return false;
8166
8167 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8168 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8169 // an entire PHI at once after the loop, opposed to calling
8170 // getIncomingValueForBlock inside this loop, since each call to
8171 // getIncomingValueForBlock is O(|Preds|).
8172 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8174 BBs2Merge.reserve(Candidates.size());
8176
8177 for (BasicBlock *BB : Candidates) {
8178 BasicBlock *Succ = BB->getSingleSuccessor();
8179 assert(Succ && "Expected unconditional BB");
8180 BBs2Merge.emplace_back(EqualBBWrapper{BB, &PhiPredIVs});
8181 Phis.insert_range(make_pointer_range(Succ->phis()));
8182 }
8183
8184 // Precompute a data structure to improve performance of isEqual for
8185 // EqualBBWrapper.
8186 PhiPredIVs.reserve(Phis.size());
8187 for (PHINode *Phi : Phis) {
8188 auto &IVs =
8189 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8190 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8191 // O(|Pred|).
8192 for (auto &IV : Phi->incoming_values())
8193 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8194 }
8195
8196 // Group duplicates using DenseSet with custom equality/hashing.
8197 // Build a set such that if the EqualBBWrapper exists in the set and another
8198 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8199 // the set should be replaced with the one in the set. If the EqualBBWrapper
8200 // is not in the set, then it should be added to the set so other
8201 // EqualBBWrapper can check against it in the same manner. We use
8202 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8203 // information to isEquality, getHashValue, and when doing the replacement
8204 // with better performance.
8206 Keep.reserve(BBs2Merge.size());
8207
8209 Updates.reserve(BBs2Merge.size() * 2);
8210
8211 bool MadeChange = false;
8212
8213 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8214 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8217 if (DTU) {
8218 // All predecessors of DeadPred (except the common predecessor) will be
8219 // moved to LivePred.
8220 Updates.reserve(Updates.size() + DeadPreds.size() * 2);
8222 predecessors(Live));
8223 for (BasicBlock *PredOfDead : DeadPreds) {
8224 // Do not modify those common predecessors of DeadPred and LivePred.
8225 if (!LivePreds.contains(PredOfDead))
8226 Updates.push_back({DominatorTree::Insert, PredOfDead, Live});
8227 Updates.push_back({DominatorTree::Delete, PredOfDead, Dead});
8228 }
8229 }
8230 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8231 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8232 Live->printAsOperand(dbgs()); dbgs() << " for ";
8233 Live->getSingleSuccessor()->printAsOperand(dbgs());
8234 dbgs() << "\n");
8235 // Replace successors in all predecessors of DeadPred.
8236 for (BasicBlock *PredOfDead : DeadPreds) {
8237 Instruction *T = PredOfDead->getTerminator();
8238 T->replaceSuccessorWith(Dead, Live);
8239 }
8240 };
8241
8242 // Try to eliminate duplicate predecessors.
8243 for (const auto &EBW : BBs2Merge) {
8244 // EBW is a candidate for simplification. If we find a duplicate BB,
8245 // replace it.
8246 const auto &[It, Inserted] = Keep.insert(&EBW);
8247 if (Inserted)
8248 continue;
8249
8250 // Found duplicate: merge P into canonical predecessor It->Pred.
8251 BasicBlock *KeepBB = (*It)->BB;
8252 BasicBlock *DeadBB = EBW.BB;
8253
8254 // Avoid merging a BB with itself.
8255 if (KeepBB == DeadBB)
8256 continue;
8257
8258 // Redirect all edges into DeadPred to KeepPred.
8259 RedirectIncomingEdges(DeadBB, KeepBB);
8260
8261 // Now DeadBB should become unreachable; leave DCE to later,
8262 // but we can try to simplify it if it only branches to Succ.
8263 // (We won't erase here to keep the routine simple and DT-safe.)
8264 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8265 MadeChange = true;
8266 }
8267
8268 if (DTU && !Updates.empty())
8269 DTU->applyUpdates(Updates);
8270
8271 return MadeChange;
8272}
8273
8274bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8275 DomTreeUpdater *DTU) {
8276 // Collect candidate switch-arms top-down.
8277 SmallSetVector<BasicBlock *, 16> FilteredArms(
8280 return mergeIdenticalBBs(FilteredArms.getArrayRef(), DTU);
8281}
8282
8283bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8284 DomTreeUpdater *DTU) {
8285 // Need at least 2 predecessors to do anything.
8286 if (!BB || !BB->hasNPredecessorsOrMore(2))
8287 return false;
8288
8289 // Compilation time consideration: retain the canonical loop, otherwise, we
8290 // require more time in the later loop canonicalization.
8291 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
8292 return false;
8293
8294 // Collect candidate predecessors bottom-up.
8295 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8298 return mergeIdenticalBBs(FilteredPreds.getArrayRef(), DTU);
8299}
8300
8301bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8302 BasicBlock *BB = SI->getParent();
8303
8304 if (isValueEqualityComparison(SI)) {
8305 // If we only have one predecessor, and if it is a branch on this value,
8306 // see if that predecessor totally determines the outcome of this switch.
8307 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8308 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8309 return requestResimplify();
8310
8311 Value *Cond = SI->getCondition();
8312 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8313 if (simplifySwitchOnSelect(SI, Select))
8314 return requestResimplify();
8315
8316 // If the block only contains the switch, see if we can fold the block
8317 // away into any preds.
8318 if (SI == &*BB->begin())
8319 if (foldValueComparisonIntoPredecessors(SI, Builder))
8320 return requestResimplify();
8321 }
8322
8323 // Try to transform the switch into an icmp and a branch.
8324 // The conversion from switch to comparison may lose information on
8325 // impossible switch values, so disable it early in the pipeline.
8326 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8327 return requestResimplify();
8328
8329 // Remove unreachable cases.
8330 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8331 return requestResimplify();
8332
8333 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8334 return requestResimplify();
8335
8336 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8337 return requestResimplify();
8338
8339 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8340 return requestResimplify();
8341
8342 // The conversion of switches to arithmetic or lookup table is disabled in
8343 // the early optimization pipeline, as it may lose information or make the
8344 // resulting code harder to analyze.
8345 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8346 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8347 Options.ConvertSwitchToLookupTable))
8348 return requestResimplify();
8349
8350 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8351 return requestResimplify();
8352
8353 if (reduceSwitchRange(SI, Builder, DL, TTI))
8354 return requestResimplify();
8355
8356 if (HoistCommon &&
8357 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8358 return requestResimplify();
8359
8360 // We can merge identical switch arms early to enhance more aggressive
8361 // optimization on switch.
8362 if (simplifyDuplicateSwitchArms(SI, DTU))
8363 return requestResimplify();
8364
8365 if (simplifySwitchWhenUMin(SI, DTU))
8366 return requestResimplify();
8367
8368 return false;
8369}
8370
8371bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8372 BasicBlock *BB = IBI->getParent();
8373 bool Changed = false;
8374 SmallVector<uint32_t> BranchWeights;
8375 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8376 extractBranchWeights(*IBI, BranchWeights);
8377
8378 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8379 if (HasBranchWeights)
8380 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8381 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8382
8383 // Eliminate redundant destinations.
8384 SmallPtrSet<Value *, 8> Succs;
8385 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8386 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8387 BasicBlock *Dest = IBI->getDestination(I);
8388 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8389 if (!Dest->hasAddressTaken())
8390 RemovedSuccs.insert(Dest);
8391 Dest->removePredecessor(BB);
8392 IBI->removeDestination(I);
8393 --I;
8394 --E;
8395 Changed = true;
8396 }
8397 }
8398
8399 if (DTU) {
8400 std::vector<DominatorTree::UpdateType> Updates;
8401 Updates.reserve(RemovedSuccs.size());
8402 for (auto *RemovedSucc : RemovedSuccs)
8403 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8404 DTU->applyUpdates(Updates);
8405 }
8406
8407 if (IBI->getNumDestinations() == 0) {
8408 // If the indirectbr has no successors, change it to unreachable.
8409 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8411 return true;
8412 }
8413
8414 if (IBI->getNumDestinations() == 1) {
8415 // If the indirectbr has one successor, change it to a direct branch.
8418 return true;
8419 }
8420 if (HasBranchWeights) {
8421 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8422 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8423 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8424 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8425 }
8426 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8427 if (simplifyIndirectBrOnSelect(IBI, SI))
8428 return requestResimplify();
8429 }
8430 return Changed;
8431}
8432
8433/// Given an block with only a single landing pad and a unconditional branch
8434/// try to find another basic block which this one can be merged with. This
8435/// handles cases where we have multiple invokes with unique landing pads, but
8436/// a shared handler.
8437///
8438/// We specifically choose to not worry about merging non-empty blocks
8439/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8440/// practice, the optimizer produces empty landing pad blocks quite frequently
8441/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8442/// sinking in this file)
8443///
8444/// This is primarily a code size optimization. We need to avoid performing
8445/// any transform which might inhibit optimization (such as our ability to
8446/// specialize a particular handler via tail commoning). We do this by not
8447/// merging any blocks which require us to introduce a phi. Since the same
8448/// values are flowing through both blocks, we don't lose any ability to
8449/// specialize. If anything, we make such specialization more likely.
8450///
8451/// TODO - This transformation could remove entries from a phi in the target
8452/// block when the inputs in the phi are the same for the two blocks being
8453/// merged. In some cases, this could result in removal of the PHI entirely.
8455 BasicBlock *BB, DomTreeUpdater *DTU) {
8456 auto Succ = BB->getUniqueSuccessor();
8457 assert(Succ);
8458 // If there's a phi in the successor block, we'd likely have to introduce
8459 // a phi into the merged landing pad block.
8460 if (isa<PHINode>(*Succ->begin()))
8461 return false;
8462
8463 for (BasicBlock *OtherPred : predecessors(Succ)) {
8464 if (BB == OtherPred)
8465 continue;
8466 BasicBlock::iterator I = OtherPred->begin();
8468 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8469 continue;
8470 ++I;
8472 if (!BI2 || !BI2->isIdenticalTo(BI))
8473 continue;
8474
8475 std::vector<DominatorTree::UpdateType> Updates;
8476
8477 // We've found an identical block. Update our predecessors to take that
8478 // path instead and make ourselves dead.
8480 for (BasicBlock *Pred : UniquePreds) {
8481 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8482 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8483 "unexpected successor");
8484 II->setUnwindDest(OtherPred);
8485 if (DTU) {
8486 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8487 Updates.push_back({DominatorTree::Delete, Pred, BB});
8488 }
8489 }
8490
8492 for (BasicBlock *Succ : UniqueSuccs) {
8493 Succ->removePredecessor(BB);
8494 if (DTU)
8495 Updates.push_back({DominatorTree::Delete, BB, Succ});
8496 }
8497
8498 IRBuilder<> Builder(BI);
8499 Builder.CreateUnreachable();
8500 BI->eraseFromParent();
8501 if (DTU)
8502 DTU->applyUpdates(Updates);
8503 return true;
8504 }
8505 return false;
8506}
8507
8508bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8509 IRBuilder<> &Builder) {
8510 BasicBlock *BB = BI->getParent();
8511 BasicBlock *Succ = BI->getSuccessor(0);
8512
8513 // If the Terminator is the only non-phi instruction, simplify the block.
8514 // If LoopHeader is provided, check if the block or its successor is a loop
8515 // header. (This is for early invocations before loop simplify and
8516 // vectorization to keep canonical loop forms for nested loops. These blocks
8517 // can be eliminated when the pass is invoked later in the back-end.)
8518 // Note that if BB has only one predecessor then we do not introduce new
8519 // backedge, so we can eliminate BB.
8520 bool NeedCanonicalLoop =
8521 Options.NeedCanonicalLoop &&
8522 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8523 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8525 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8526 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8527 return true;
8528
8529 // If the only instruction in the block is a seteq/setne comparison against a
8530 // constant, try to simplify the block.
8531 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8532 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8533 ++I;
8534 if (I->isTerminator() &&
8535 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8536 return true;
8537 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8538 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8539 Builder))
8540 return true;
8541 }
8542 }
8543
8544 // See if we can merge an empty landing pad block with another which is
8545 // equivalent.
8546 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8547 ++I;
8548 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8549 return true;
8550 }
8551
8552 return false;
8553}
8554
8556 BasicBlock *PredPred = nullptr;
8557 for (auto *P : predecessors(BB)) {
8558 BasicBlock *PPred = P->getSinglePredecessor();
8559 if (!PPred || (PredPred && PredPred != PPred))
8560 return nullptr;
8561 PredPred = PPred;
8562 }
8563 return PredPred;
8564}
8565
8566/// Fold the following pattern:
8567/// bb0:
8568/// br i1 %cond1, label %bb1, label %bb2
8569/// bb1:
8570/// br i1 %cond2, label %bb3, label %bb4
8571/// bb2:
8572/// br i1 %cond2, label %bb4, label %bb3
8573/// bb3:
8574/// ...
8575/// bb4:
8576/// ...
8577/// into
8578/// bb0:
8579/// %cond = xor i1 %cond1, %cond2
8580/// br i1 %cond, label %bb4, label %bb3
8581/// bb3:
8582/// ...
8583/// bb4:
8584/// ...
8585/// NOTE: %cond2 always dominates the terminator of bb0.
8587 BasicBlock *BB = BI->getParent();
8588 BasicBlock *BB1 = BI->getSuccessor(0);
8589 BasicBlock *BB2 = BI->getSuccessor(1);
8590 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8591 if (Succ == BB)
8592 return false;
8593 if (&Succ->front() != Succ->getTerminator())
8594 return false;
8595 SuccBI = dyn_cast<CondBrInst>(Succ->getTerminator());
8596 if (!SuccBI)
8597 return false;
8598 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8599 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8600 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8601 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8602 };
8603 CondBrInst *BB1BI, *BB2BI;
8604 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8605 return false;
8606
8607 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8608 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8609 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8610 return false;
8611
8612 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8613 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8614 IRBuilder<> Builder(BI);
8615 BI->setCondition(
8616 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8617 BB1->removePredecessor(BB);
8618 BI->setSuccessor(0, BB4);
8619 BB2->removePredecessor(BB);
8620 BI->setSuccessor(1, BB3);
8621 if (DTU) {
8623 Updates.push_back({DominatorTree::Delete, BB, BB1});
8624 Updates.push_back({DominatorTree::Insert, BB, BB4});
8625 Updates.push_back({DominatorTree::Delete, BB, BB2});
8626 Updates.push_back({DominatorTree::Insert, BB, BB3});
8627
8628 DTU->applyUpdates(Updates);
8629 }
8630 bool HasWeight = false;
8631 uint64_t BBTWeight, BBFWeight;
8632 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8633 HasWeight = true;
8634 else
8635 BBTWeight = BBFWeight = 1;
8636 uint64_t BB1TWeight, BB1FWeight;
8637 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8638 HasWeight = true;
8639 else
8640 BB1TWeight = BB1FWeight = 1;
8641 uint64_t BB2TWeight, BB2FWeight;
8642 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8643 HasWeight = true;
8644 else
8645 BB2TWeight = BB2FWeight = 1;
8646 if (HasWeight) {
8647 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8648 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8649 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8650 /*ElideAllZero=*/true);
8651 }
8652 return true;
8653}
8654
8655bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8656 assert(
8658 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8659 "Tautological conditional branch should have been eliminated already.");
8660
8661 BasicBlock *BB = BI->getParent();
8662 if (!Options.SimplifyCondBranch ||
8663 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8664 return false;
8665
8666 // Conditional branch
8667 if (isValueEqualityComparison(BI)) {
8668 // If we only have one predecessor, and if it is a branch on this value,
8669 // see if that predecessor totally determines the outcome of this
8670 // switch.
8671 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8672 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8673 return requestResimplify();
8674
8675 // This block must be empty, except for the setcond inst, if it exists.
8676 // Ignore pseudo intrinsics.
8677 for (auto &I : *BB) {
8678 if (isa<PseudoProbeInst>(I) ||
8679 &I == cast<Instruction>(BI->getCondition()))
8680 continue;
8681 if (&I == BI)
8682 if (foldValueComparisonIntoPredecessors(BI, Builder))
8683 return requestResimplify();
8684 break;
8685 }
8686 }
8687
8688 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8689 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8690 return true;
8691
8692 // If this basic block has dominating predecessor blocks and the dominating
8693 // blocks' conditions imply BI's condition, we know the direction of BI.
8694 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8695 if (Imp) {
8696 // Turn this into a branch on constant.
8697 auto *OldCond = BI->getCondition();
8698 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8699 : ConstantInt::getFalse(BB->getContext());
8700 BI->setCondition(TorF);
8702 return requestResimplify();
8703 }
8704
8705 // If this basic block is ONLY a compare and a branch, and if a predecessor
8706 // branches to us and one of our successors, fold the comparison into the
8707 // predecessor and use logical operations to pick the right destination.
8708 if (Options.SpeculateBlocks &&
8709 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.AC,
8710 Options.BonusInstThreshold))
8711 return requestResimplify();
8712
8713 // We have a conditional branch to two blocks that are only reachable
8714 // from BI. We know that the condbr dominates the two blocks, so see if
8715 // there is any identical code in the "then" and "else" blocks. If so, we
8716 // can hoist it up to the branching block.
8717 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8718 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8719 if (HoistCommon &&
8720 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8721 return requestResimplify();
8722
8723 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8724 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8725 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8726 auto CanSpeculateConditionalLoadsStores = [&]() {
8727 for (auto *Succ : successors(BB)) {
8728 for (Instruction &I : *Succ) {
8729 if (I.isTerminator()) {
8730 if (I.getNumSuccessors() > 1)
8731 return false;
8732 continue;
8733 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8734 SpeculatedConditionalLoadsStores.size() ==
8736 return false;
8737 }
8738 SpeculatedConditionalLoadsStores.push_back(&I);
8739 }
8740 }
8741 return !SpeculatedConditionalLoadsStores.empty();
8742 };
8743
8744 if (CanSpeculateConditionalLoadsStores()) {
8745 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8746 std::nullopt, nullptr);
8747 return requestResimplify();
8748 }
8749 }
8750 } else {
8751 // If Successor #1 has multiple preds, we may be able to conditionally
8752 // execute Successor #0 if it branches to Successor #1.
8753 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8754 if (Succ0TI->getNumSuccessors() == 1 &&
8755 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8756 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8757 return requestResimplify();
8758 }
8759 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8760 // If Successor #0 has multiple preds, we may be able to conditionally
8761 // execute Successor #1 if it branches to Successor #0.
8762 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8763 if (Succ1TI->getNumSuccessors() == 1 &&
8764 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8765 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8766 return requestResimplify();
8767 }
8768
8769 // If this is a branch on something for which we know the constant value in
8770 // predecessors (e.g. a phi node in the current block), thread control
8771 // through this block.
8772 if (foldCondBranchOnValueKnownInPredecessor(BI))
8773 return requestResimplify();
8774
8775 // Scan predecessor blocks for conditional branches.
8776 for (BasicBlock *Pred : predecessors(BB))
8777 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Pred->getTerminator()))
8778 if (PBI != BI)
8779 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8780 return requestResimplify();
8781
8782 // Look for diamond patterns.
8783 if (MergeCondStores)
8784 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8785 if (CondBrInst *PBI = dyn_cast<CondBrInst>(PrevBB->getTerminator()))
8786 if (PBI != BI)
8787 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8788 return requestResimplify();
8789
8790 // Look for nested conditional branches.
8791 if (mergeNestedCondBranch(BI, DTU))
8792 return requestResimplify();
8793
8794 return false;
8795}
8796
8797/// Check if passing a value to an instruction will cause undefined behavior.
8798static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8799 assert(V->getType() == I->getType() && "Mismatched types");
8801 if (!C)
8802 return false;
8803
8804 if (I->use_empty())
8805 return false;
8806
8807 if (C->isNullValue() || isa<UndefValue>(C)) {
8808 // Find the first same-block use with a UB-triggering opcode, skipping
8809 // cross-block or before-I uses.
8810 auto FindUse = llvm::find_if(I->uses(), [I](auto &U) {
8811 auto *Use = cast<Instruction>(U.getUser());
8812 // Only same-block uses after I can witness UB at I's program point.
8813 // Self-uses and before-I uses can occur when I is a PHI node.
8814 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8815 return false;
8816 // Change this list when we want to add new instructions.
8817 switch (Use->getOpcode()) {
8818 default:
8819 return false;
8820 case Instruction::GetElementPtr:
8821 case Instruction::Ret:
8822 case Instruction::BitCast:
8823 case Instruction::Load:
8824 case Instruction::Store:
8825 case Instruction::Call:
8826 case Instruction::CallBr:
8827 case Instruction::Invoke:
8828 case Instruction::UDiv:
8829 case Instruction::URem:
8830 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8831 // implemented to avoid code complexity as it is unclear how useful such
8832 // logic is.
8833 case Instruction::SDiv:
8834 case Instruction::SRem:
8835 return true;
8836 }
8837 });
8838 if (FindUse == I->use_end())
8839 return false;
8840 auto &Use = *FindUse;
8841 auto *User = cast<Instruction>(Use.getUser());
8842
8843 // Now make sure that there are no instructions in between that can alter
8844 // control flow (eg. calls)
8845 auto InstrRange =
8846 make_range(std::next(I->getIterator()), User->getIterator());
8847 if (any_of(InstrRange, [](Instruction &I) {
8849 }))
8850 return false;
8851
8852 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8854 if (GEP->getPointerOperand() == I) {
8855 // The type of GEP may differ from the type of base pointer.
8856 // Bail out on vector GEPs, as they are not handled by other checks.
8857 if (GEP->getType()->isVectorTy())
8858 return false;
8859 // The current base address is null, there are four cases to consider:
8860 // getelementptr (TY, null, 0) -> null
8861 // getelementptr (TY, null, not zero) -> may be modified
8862 // getelementptr inbounds (TY, null, 0) -> null
8863 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8864 // undefined?
8865 if (!GEP->hasAllZeroIndices() &&
8866 (!GEP->isInBounds() ||
8867 NullPointerIsDefined(GEP->getFunction(),
8868 GEP->getPointerAddressSpace())))
8869 PtrValueMayBeModified = true;
8870 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8871 }
8872
8873 // Look through return.
8874 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8875 bool HasNoUndefAttr =
8876 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8877 // Return undefined to a noundef return value is undefined.
8878 if (isa<UndefValue>(C) && HasNoUndefAttr)
8879 return true;
8880 // Return null to a nonnull+noundef return value is undefined.
8881 if (C->isNullValue() && HasNoUndefAttr &&
8882 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8883 return !PtrValueMayBeModified;
8884 }
8885 }
8886
8887 // Load from null is undefined.
8888 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8889 if (!LI->isVolatile())
8890 return !NullPointerIsDefined(LI->getFunction(),
8891 LI->getPointerAddressSpace());
8892
8893 // Store to null is undefined.
8895 if (!SI->isVolatile())
8896 return (!NullPointerIsDefined(SI->getFunction(),
8897 SI->getPointerAddressSpace())) &&
8898 SI->getPointerOperand() == I;
8899
8900 // llvm.assume(false/undef) always triggers immediate UB.
8901 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8902 // Ignore assume operand bundles.
8903 if (I == Assume->getArgOperand(0))
8904 return true;
8905 }
8906
8907 if (auto *CB = dyn_cast<CallBase>(User)) {
8908 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8909 return false;
8910 // A call to null is undefined.
8911 if (CB->getCalledOperand() == I)
8912 return true;
8913
8914 if (CB->isArgOperand(&Use)) {
8915 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8916 // Passing null to a nonnnull+noundef argument is undefined.
8917 if (isa<ConstantPointerNull>(C) && C->getType()->isPointerTy() &&
8918 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8919 return !PtrValueMayBeModified;
8920 // Passing undef to a noundef argument is undefined.
8921 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8922 return true;
8923 }
8924 }
8925 // Div/Rem by zero is immediate UB
8926 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8927 return true;
8928 }
8929 return false;
8930}
8931
8932/// If BB has an incoming value that will always trigger undefined behavior
8933/// (eg. null pointer dereference), remove the branch leading here.
8935 DomTreeUpdater *DTU,
8936 AssumptionCache *AC) {
8937 for (PHINode &PHI : BB->phis())
8938 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8939 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8940 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8941 Instruction *T = Predecessor->getTerminator();
8942 IRBuilder<> Builder(T);
8943 if (isa<UncondBrInst>(T)) {
8944 BB->removePredecessor(Predecessor);
8945 // Turn unconditional branches into unreachables.
8946 Builder.CreateUnreachable();
8947 T->eraseFromParent();
8948 if (DTU)
8949 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8950 return true;
8951 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(T)) {
8952 BB->removePredecessor(Predecessor);
8953 // Preserve guarding condition in assume, because it might not be
8954 // inferrable from any dominating condition.
8955 Value *Cond = BI->getCondition();
8956 CallInst *Assumption;
8957 if (BI->getSuccessor(0) == BB)
8958 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8959 else
8960 Assumption = Builder.CreateAssumption(Cond);
8961 if (AC)
8962 AC->registerAssumption(cast<AssumeInst>(Assumption));
8963 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8964 : BI->getSuccessor(0));
8965 BI->eraseFromParent();
8966 if (DTU)
8967 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8968 return true;
8969 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8970 // Redirect all branches leading to UB into
8971 // a newly created unreachable block.
8972 BasicBlock *Unreachable = BasicBlock::Create(
8973 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8974 Builder.SetInsertPoint(Unreachable);
8975 // The new block contains only one instruction: Unreachable
8976 Builder.CreateUnreachable();
8977 for (const auto &Case : SI->cases())
8978 if (Case.getCaseSuccessor() == BB) {
8979 BB->removePredecessor(Predecessor);
8980 Case.setSuccessor(Unreachable);
8981 }
8982 if (SI->getDefaultDest() == BB) {
8983 BB->removePredecessor(Predecessor);
8984 SI->setDefaultDest(Unreachable);
8985 }
8986
8987 if (DTU)
8988 DTU->applyUpdates(
8989 { { DominatorTree::Insert, Predecessor, Unreachable },
8990 { DominatorTree::Delete, Predecessor, BB } });
8991 return true;
8992 }
8993 }
8994
8995 return false;
8996}
8997
8998bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8999 bool Changed = false;
9000
9001 assert(BB && BB->getParent() && "Block not embedded in function!");
9002 assert(BB->getTerminator() && "Degenerate basic block encountered!");
9003
9004 // Remove basic blocks that have no predecessors (except the entry block)...
9005 // or that just have themself as a predecessor. These are unreachable.
9006 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
9007 BB->getSinglePredecessor() == BB) {
9008 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
9009 DeleteDeadBlock(BB, DTU);
9010 return true;
9011 }
9012
9013 // Check to see if we can constant propagate this terminator instruction
9014 // away...
9015 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
9016 /*TLI=*/nullptr, DTU);
9017
9018 // Check for and eliminate duplicate PHI nodes in this block.
9020
9021 // Check for and remove branches that will always cause undefined behavior.
9023 return requestResimplify();
9024
9025 // Merge basic blocks into their predecessor if there is only one distinct
9026 // pred, and if there is only one distinct successor of the predecessor, and
9027 // if there are no PHI nodes.
9028 if (MergeBlockIntoPredecessor(BB, DTU))
9029 return true;
9030
9031 if (SinkCommon && Options.SinkCommonInsts) {
9032 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
9033 mergeCompatibleInvokes(BB, DTU)) {
9034 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
9035 // so we may now how duplicate PHI's.
9036 // Let's rerun EliminateDuplicatePHINodes() first,
9037 // before foldTwoEntryPHINode() potentially converts them into select's,
9038 // after which we'd need a whole EarlyCSE pass run to cleanup them.
9039 return true;
9040 }
9041 // Merge identical predecessors of this block.
9042 if (simplifyDuplicatePredecessors(BB, DTU))
9043 return true;
9044 }
9045
9046 if (Options.SpeculateBlocks &&
9047 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
9048 // If there is a trivial two-entry PHI node in this basic block, and we can
9049 // eliminate it, do so now.
9050 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
9051 if (PN->getNumIncomingValues() == 2)
9052 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
9053 Options.SpeculateUnpredictables))
9054 return true;
9055 }
9056
9057 IRBuilder<> Builder(BB);
9059 Builder.SetInsertPoint(Terminator);
9060 switch (Terminator->getOpcode()) {
9061 case Instruction::UncondBr:
9062 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
9063 break;
9064 case Instruction::CondBr:
9065 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
9066 break;
9067 case Instruction::Resume:
9068 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
9069 break;
9070 case Instruction::CleanupRet:
9071 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
9072 break;
9073 case Instruction::Switch:
9074 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
9075 break;
9076 case Instruction::Unreachable:
9077 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
9078 break;
9079 case Instruction::IndirectBr:
9080 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
9081 break;
9082 }
9083
9084 return Changed;
9085}
9086
9087bool SimplifyCFGOpt::run(BasicBlock *BB) {
9088 bool Changed = false;
9089
9090 // Repeated simplify BB as long as resimplification is requested.
9091 do {
9092 Resimplify = false;
9093
9094 // Perform one round of simplifcation. Resimplify flag will be set if
9095 // another iteration is requested.
9096 Changed |= simplifyOnce(BB);
9097 } while (Resimplify);
9098
9099 return Changed;
9100}
9101
9104 ArrayRef<WeakVH> LoopHeaders) {
9105 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9106 Options)
9107 .run(BB);
9108}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool isProfitableToSpeculate(const CondBrInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static void hoistConditionalLoadsStores(CondBrInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool mergeIdenticalBBs(ArrayRef< BasicBlock * > Candidates, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1995
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:1125
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:951
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1316
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:716
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
Definition Constants.cpp:89
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:126
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:244
static DebugLoc getTemporary()
Definition DebugLoc.h:152
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:172
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:159
static DebugLoc getDropped()
Definition DebugLoc.h:155
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:270
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:178
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
const BasicBlock & getEntryBlock() const
Definition Function.h:783
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:758
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:685
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2380
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2128
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1216
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:457
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:176
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2683
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:221
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
LLVM_ABI CallInst * CreateAssumption(Value *Cond)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2008
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1210
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1854
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1239
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2364
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1906
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2110
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1919
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1422
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2222
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:462
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2096
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2305
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:181
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2474
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1456
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
size_type size() const
Definition MapVector.h:58
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:306
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
void setSuccessor(BasicBlock *NewSucc)
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1631
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
LLVM_ABI void set(Value *Val)
Definition Value.h:874
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:54
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
auto m_UMin(const Opnd0 &Op0, const Opnd1 &Op1)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_bind< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
constexpr double e
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:573
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
LLVM_ABI bool foldBranchToCommonDest(CondBrInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, AssumptionCache *AC=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
bool succ_empty(const Instruction *I)
Definition CFG.h:141
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
RelativeUniformCounterPtr Values
Definition InstrProf.h:91
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
@ Dead
Unused definition.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1702
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1791
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI CondBrInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
LLVM_ABI void InvertBranch(CondBrInst *PBI, IRBuilderBase &Builder)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
auto succ_size(const MachineBasicBlock *BB)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
LLVM_ABI bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3389
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
IntPtrTy
Definition InstrProf.h:82
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3896
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1596
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Equivalent to isDereferenceableAndAlignedPointer with an alignment of 1.
Definition Loads.cpp:264
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
@ Keep
No function return thunk.
Definition CodeGen.h:162
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two BBs are equal depends on the contents of the BasicBlock and the incoming values ...
SmallDenseMap< BasicBlock *, Value *, 8 > BB2ValueMap
Phi2IVsMap * PhiPredIVs
DenseMap< PHINode *, BB2ValueMap > Phi2IVsMap
static bool canBeMerged(const BasicBlock *BB)
BasicBlock * BB
static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS)
static unsigned getHashValue(const EqualBBWrapper *EBW)
An information struct used to provide DenseMap with the various necessary components for a given valu...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342