79#define DEBUG_TYPE "block-placement"
81STATISTIC(NumCondBranches,
"Number of conditional branches");
82STATISTIC(NumUncondBranches,
"Number of unconditional branches");
84 "Potential frequency of taking conditional branches");
86 "Potential frequency of taking unconditional branches");
90 cl::desc(
"Force the alignment of all blocks in the function in log2 format "
91 "(e.g 4 means align on 16B boundaries)."),
95 "align-all-nofallthru-blocks",
96 cl::desc(
"Force the alignment of all blocks that have no fall-through "
97 "predecessors (i.e. don't add nops that are executed). In log2 "
98 "format (e.g 4 means align on 16B boundaries)."),
102 "max-bytes-for-alignment",
103 cl::desc(
"Forces the maximum bytes allowed to be emitted when padding for "
108 "block-placement-predecessor-limit",
109 cl::desc(
"For blocks with more predecessors, certain layout optimizations"
110 "will be disabled to prevent quadratic compile time."),
115 "block-placement-exit-block-bias",
116 cl::desc(
"Block frequency percentage a loop exit block needs "
117 "over the original exit to be considered the new exit."),
124 "loop-to-cold-block-ratio",
125 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / "
126 "(frequency of block) is greater than this ratio"),
131 cl::desc(
"Force outlining cold blocks from loops."),
136 cl::desc(
"Model the cost of loop rotation more "
137 "precisely by using profile data."),
142 cl::desc(
"Force the use of precise cost "
143 "loop rotation strategy."),
148 cl::desc(
"Cost that models the probabilistic risk of an instruction "
149 "misfetch due to a jump comparing to falling through, whose cost "
154 cl::desc(
"Cost of jump instructions."),
158 cl::desc(
"Perform tail duplication during placement. "
159 "Creates more fallthrough opportunities in "
160 "outline branches."),
165 cl::desc(
"Perform branch folding during placement. "
166 "Reduces code size."),
171 "tail-dup-placement-threshold",
172 cl::desc(
"Instruction cutoff for tail duplication during layout. "
173 "Tail merging during layout is forced to have a threshold "
174 "that won't conflict."),
179 "tail-dup-placement-aggressive-threshold",
180 cl::desc(
"Instruction cutoff for aggressive tail duplication during "
181 "layout. Used at -O3. Tail merging during layout is forced to "
182 "have a threshold that won't conflict."),
187 "tail-dup-placement-penalty",
189 "Cost penalty for blocks that can avoid breaking CFG by copying. "
190 "Copying can increase fallthrough, but it also increases icache "
191 "pressure. This parameter controls the penalty to account for that. "
192 "Percent as integer."),
197 "tail-dup-profile-percent-threshold",
198 cl::desc(
"If profile count information is used in tail duplication cost "
199 "model, the gained fall through number from tail duplication "
200 "should be at least this percent of hot count."),
205 "triangle-chain-count",
206 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the "
207 "triangle tail duplication heuristic to kick in. 0 to disable."),
216 "renumber-blocks-before-view",
218 "If true, basic blocks are re-numbered before MBP layout is printed "
219 "into a dot graph. Only used when a function is being printed."),
223 "ext-tsp-block-placement-max-blocks",
224 cl::desc(
"Maximum number of basic blocks in a function to run ext-TSP "
231 cl::desc(
"Use ext-tsp for size-aware block placement."));
280 BlockToChainMapType &BlockToChain;
289 : Blocks(1, BB), BlockToChain(BlockToChain) {
290 assert(BB &&
"Cannot create a chain with a null basic block");
291 BlockToChain[BB] =
this;
307 for (
iterator i = begin(); i != end(); ++i) {
323 assert(BB &&
"Can't merge a null block.");
324 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
328 assert(!BlockToChain[BB] &&
329 "Passed chain is null, but BB has entry in BlockToChain.");
331 BlockToChain[BB] =
this;
335 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
336 assert(Chain->begin() != Chain->end());
342 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
343 BlockToChain[ChainBB] =
this;
364 unsigned UnscheduledPredecessors = 0;
367class MachineBlockPlacement {
369 using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
372 struct BlockAndTailDupResult {
373 MachineBasicBlock *BB =
nullptr;
378 struct WeightedEdge {
379 BlockFrequency Weight;
380 MachineBasicBlock *Src =
nullptr;
381 MachineBasicBlock *Dest =
nullptr;
389 DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
392 MachineFunction *F =
nullptr;
395 const MachineBranchProbabilityInfo *MBPI =
nullptr;
398 std::unique_ptr<MBFIWrapper> MBFI;
401 MachineLoopInfo *MLI =
nullptr;
406 MachineBasicBlock *PreferredLoopExit =
nullptr;
409 const TargetInstrInfo *TII =
nullptr;
412 const TargetLoweringBase *TLI =
nullptr;
415 MachinePostDominatorTree *MPDT =
nullptr;
417 ProfileSummaryInfo *PSI =
nullptr;
430 TailDuplicator TailDup;
433 BlockFrequency DupThreshold;
435 unsigned TailDupSize;
439 bool UseProfileCount =
false;
448 SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
456 DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
463 SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
468 BlockFrequency getBlockCountOrFrequency(
const MachineBasicBlock *BB) {
469 if (UseProfileCount) {
470 auto Count = MBFI->getBlockProfileCount(BB);
472 return BlockFrequency(*
Count);
474 return BlockFrequency(0);
476 return MBFI->getBlockFreq(BB);
480 BlockFrequency scaleThreshold(MachineBasicBlock *BB);
481 void initTailDupThreshold();
485 void markChainSuccessors(
const BlockChain &Chain,
486 const MachineBasicBlock *LoopHeaderBB,
487 const BlockFilterSet *BlockFilter =
nullptr);
491 void markBlockSuccessors(
const BlockChain &Chain,
const MachineBasicBlock *BB,
492 const MachineBasicBlock *LoopHeaderBB,
493 const BlockFilterSet *BlockFilter =
nullptr);
496 collectViableSuccessors(
const MachineBasicBlock *BB,
const BlockChain &Chain,
497 const BlockFilterSet *BlockFilter,
498 SmallVector<MachineBasicBlock *, 4> &Successors);
499 bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
500 BlockFilterSet *BlockFilter);
501 void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
502 MachineBasicBlock *BB,
503 BlockFilterSet *BlockFilter);
504 bool repeatedlyTailDuplicateBlock(
505 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
506 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
507 BlockFilterSet *BlockFilter,
511 maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
512 BlockChain &Chain, BlockFilterSet *BlockFilter,
515 bool &DuplicatedToLPred);
516 bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB,
517 const MachineBasicBlock *Succ,
518 const BlockChain &SuccChain,
519 BranchProbability SuccProb,
520 BranchProbability RealSuccProb,
521 const BlockChain &Chain,
522 const BlockFilterSet *BlockFilter);
523 BlockAndTailDupResult selectBestSuccessor(
const MachineBasicBlock *BB,
524 const BlockChain &Chain,
525 const BlockFilterSet *BlockFilter);
527 selectBestCandidateBlock(
const BlockChain &Chain,
528 SmallVectorImpl<MachineBasicBlock *> &WorkList);
530 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
533 getFirstUnplacedBlock(
const BlockChain &PlacedChain,
535 const BlockFilterSet *BlockFilter);
542 void fillWorkLists(
const MachineBasicBlock *
MBB,
543 SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
544 const BlockFilterSet *BlockFilter);
546 void buildChain(
const MachineBasicBlock *BB, BlockChain &Chain,
547 BlockFilterSet *BlockFilter =
nullptr);
548 bool canMoveBottomBlockToTop(
const MachineBasicBlock *BottomBlock,
549 const MachineBasicBlock *OldTop);
550 bool hasViableTopFallthrough(
const MachineBasicBlock *Top,
551 const BlockFilterSet &LoopBlockSet);
552 BlockFrequency TopFallThroughFreq(
const MachineBasicBlock *Top,
553 const BlockFilterSet &LoopBlockSet);
554 BlockFrequency FallThroughGains(
const MachineBasicBlock *NewTop,
555 const MachineBasicBlock *OldTop,
556 const MachineBasicBlock *ExitBB,
557 const BlockFilterSet &LoopBlockSet);
558 MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
559 const MachineLoop &L,
560 const BlockFilterSet &LoopBlockSet);
561 MachineBasicBlock *findBestLoopTop(
const MachineLoop &L,
562 const BlockFilterSet &LoopBlockSet);
563 MachineBasicBlock *findBestLoopExit(
const MachineLoop &L,
564 const BlockFilterSet &LoopBlockSet,
565 BlockFrequency &ExitFreq);
566 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
567 void buildLoopChains(
const MachineLoop &L);
568 void rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
569 BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet);
570 void rotateLoopWithProfile(BlockChain &LoopChain,
const MachineLoop &L,
571 const BlockFilterSet &LoopBlockSet);
572 void buildCFGChains();
573 void optimizeBranches();
577 bool shouldTailDuplicate(MachineBasicBlock *BB);
580 bool isProfitableToTailDup(
const MachineBasicBlock *BB,
581 const MachineBasicBlock *Succ,
582 BranchProbability QProb,
const BlockChain &Chain,
583 const BlockFilterSet *BlockFilter);
586 bool isTrellis(
const MachineBasicBlock *BB,
587 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
588 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
591 BlockAndTailDupResult getBestTrellisSuccessor(
592 const MachineBasicBlock *BB,
593 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
594 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
595 const BlockFilterSet *BlockFilter);
598 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
599 const MachineBasicBlock *BB,
604 bool canTailDuplicateUnplacedPreds(
const MachineBasicBlock *BB,
605 MachineBasicBlock *Succ,
606 const BlockChain &Chain,
607 const BlockFilterSet *BlockFilter);
611 void precomputeTriangleChains();
614 void applyExtTsp(
bool OptForSize);
617 void assignBlockOrder(
const std::vector<const MachineBasicBlock *> &NewOrder);
620 void createCFGChainExtTsp();
623 MachineBlockPlacement(
const MachineBranchProbabilityInfo *MBPI,
624 MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
625 std::unique_ptr<MBFIWrapper> MBFI,
626 MachinePostDominatorTree *MPDT,
bool AllowTailMerge)
627 : MBPI(MBPI), MBFI(std::
move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
628 AllowTailMerge(AllowTailMerge) {};
630 bool run(MachineFunction &F);
632 static bool allowTailDupPlacement(MachineFunction &MF) {
641 MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {}
643 bool runOnMachineFunction(MachineFunction &MF)
override {
648 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
649 auto MBFI = std::make_unique<MBFIWrapper>(
650 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
651 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
652 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
653 ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
656 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
657 auto *PassConfig = &getAnalysis<TargetPassConfig>();
658 bool AllowTailMerge = PassConfig->getEnableTailMerge();
659 return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
664 void getAnalysisUsage(AnalysisUsage &AU)
const override {
665 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
666 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
668 AU.
addRequired<MachinePostDominatorTreeWrapperPass>();
678char MachineBlockPlacementLegacy::ID = 0;
683 "Branch Probability Basic Block Placement",
false,
false)
700 OS <<
" ('" << BB->
getName() <<
"')";
711void MachineBlockPlacement::markChainSuccessors(
713 const BlockFilterSet *BlockFilter) {
716 for (MachineBasicBlock *
MBB : Chain) {
717 markBlockSuccessors(Chain,
MBB, LoopHeaderBB, BlockFilter);
727void MachineBlockPlacement::markBlockSuccessors(
728 const BlockChain &Chain,
const MachineBasicBlock *
MBB,
729 const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
735 if (BlockFilter && !BlockFilter->count(Succ))
737 BlockChain &SuccChain = *BlockToChain[Succ];
739 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
744 if (SuccChain.UnscheduledPredecessors == 0 ||
745 --SuccChain.UnscheduledPredecessors > 0)
748 auto *NewBB = *SuccChain.begin();
749 if (NewBB->isEHPad())
760BranchProbability MachineBlockPlacement::collectViableSuccessors(
761 const MachineBasicBlock *BB,
const BlockChain &Chain,
762 const BlockFilterSet *BlockFilter,
763 SmallVector<MachineBasicBlock *, 4> &Successors) {
781 for (MachineBasicBlock *Succ : BB->
successors()) {
782 bool SkipSucc =
false;
783 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
786 BlockChain *SuccChain = BlockToChain[Succ];
787 if (SuccChain == &Chain) {
789 }
else if (Succ != *SuccChain->begin()) {
791 <<
" -> Mid chain!\n");
801 return AdjustedSumProb;
806static BranchProbability
812 if (SuccProbN >= SuccProbD)
827 if (Successors.
count(&BB))
830 if (!Successors.
count(Succ))
838bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
859 return (Gain / ThresholdProb) >= EntryFreq;
867bool MachineBlockPlacement::isProfitableToTailDup(
868 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
869 BranchProbability QProb,
const BlockChain &Chain,
870 const BlockFilterSet *BlockFilter) {
894 MachineBasicBlock *PDom =
nullptr;
895 SmallVector<MachineBasicBlock *, 4> SuccSuccs;
897 auto AdjustedSuccSumProb =
898 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
900 auto BBFreq = MBFI->getBlockFreq(BB);
901 auto SuccFreq = MBFI->getBlockFreq(Succ);
902 BlockFrequency
P =
BBFreq * PProb;
903 BlockFrequency Qout =
BBFreq * QProb;
904 BlockFrequency EntryFreq = MBFI->getEntryFreq();
907 if (SuccSuccs.
size() == 0)
912 for (MachineBasicBlock *SuccSucc : SuccSuccs) {
914 if (Prob > BestSuccSucc)
924 auto SuccBestPred = BlockFrequency(0);
925 for (MachineBasicBlock *SuccPred : Succ->
predecessors()) {
926 if (SuccPred == Succ || SuccPred == BB ||
927 BlockToChain[SuccPred] == &Chain ||
928 (BlockFilter && !BlockFilter->count(SuccPred)))
932 if (Freq > SuccBestPred)
936 BlockFrequency Qin = SuccBestPred;
957 BranchProbability UProb = BestSuccSucc;
958 BranchProbability VProb = AdjustedSuccSumProb - UProb;
959 BlockFrequency
F = SuccFreq - Qin;
960 BlockFrequency
V = SuccFreq * VProb;
961 BlockFrequency QinU = std::min(Qin,
F) * UProb;
962 BlockFrequency BaseCost =
P +
V;
963 BlockFrequency DupCost = Qout + QinU + std::max(Qin,
F) * VProb;
967 BranchProbability VProb = AdjustedSuccSumProb - UProb;
968 BlockFrequency
U = SuccFreq * UProb;
969 BlockFrequency
V = SuccFreq * VProb;
970 BlockFrequency
F = SuccFreq - Qin;
1000 if (UProb > AdjustedSuccSumProb / 2 &&
1001 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
1002 Chain, BlockFilter))
1005 (
P + V), (Qout + std::max(Qin,
F) * VProb + std::min(Qin,
F) * UProb),
1009 (Qout + std::min(Qin,
F) * AdjustedSuccSumProb +
1010 std::max(Qin,
F) * UProb),
1021bool MachineBlockPlacement::isTrellis(
1022 const MachineBasicBlock *BB,
1023 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1024 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1033 SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
1035 for (MachineBasicBlock *Succ : ViableSuccs) {
1044 if (Successors.count(SuccPred)) {
1046 for (MachineBasicBlock *CheckSucc : SuccPred->successors())
1047 if (!Successors.count(CheckSucc))
1051 const BlockChain *PredChain = BlockToChain[SuccPred];
1052 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
1053 PredChain == &Chain || PredChain == BlockToChain[Succ])
1057 if (!SeenPreds.
insert(SuccPred).second)
1075std::pair<MachineBlockPlacement::WeightedEdge,
1076 MachineBlockPlacement::WeightedEdge>
1077MachineBlockPlacement::getBestNonConflictingEdges(
1078 const MachineBasicBlock *BB,
1088 auto Cmp = [](WeightedEdge
A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
1092 auto BestA = Edges[0].begin();
1093 auto BestB = Edges[1].begin();
1096 if (BestA->Src == BestB->Src) {
1098 auto SecondBestA = std::next(BestA);
1099 auto SecondBestB = std::next(BestB);
1100 BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
1101 BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
1102 if (BestAScore < BestBScore)
1103 BestA = SecondBestA;
1105 BestB = SecondBestB;
1108 if (BestB->Src == BB)
1110 return std::make_pair(*BestA, *BestB);
1120MachineBlockPlacement::BlockAndTailDupResult
1121MachineBlockPlacement::getBestTrellisSuccessor(
1122 const MachineBasicBlock *BB,
1123 const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
1124 BranchProbability AdjustedSumProb,
const BlockChain &Chain,
1125 const BlockFilterSet *BlockFilter) {
1127 BlockAndTailDupResult
Result = {
nullptr,
false};
1134 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
1140 for (
auto *Succ : ViableSuccs) {
1141 for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
1143 if (SuccPred != BB) {
1144 if (BlockFilter && !BlockFilter->count(SuccPred))
1146 const BlockChain *SuccPredChain = BlockToChain[SuccPred];
1147 if (SuccPredChain == &Chain || SuccPredChain == BlockToChain[Succ])
1150 BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
1152 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1158 WeightedEdge BestA, BestB;
1159 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1161 if (BestA.Src != BB) {
1165 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1172 if (BestA.Dest == BestB.Src) {
1175 MachineBasicBlock *Succ1 = BestA.Dest;
1176 MachineBasicBlock *Succ2 = BestB.Dest;
1178 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ2) &&
1179 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1181 Chain, BlockFilter)) {
1185 <<
", probability: " << Succ2Prob
1186 <<
" (Tail Duplicate)\n");
1188 Result.ShouldTailDup =
true;
1194 ComputedEdges[BestB.Src] = {BestB.Dest,
false};
1196 auto TrellisSucc = BestA.Dest;
1200 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1208bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1209 const MachineBasicBlock *BB, MachineBasicBlock *Succ,
1210 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1211 if (!shouldTailDuplicate(Succ))
1215 bool Duplicate =
true;
1217 unsigned int NumDup = 0;
1226 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) ||
1227 (BlockToChain[Pred] == &Chain && !Succ->
succ_empty()))
1277 if (
F->getFunction().hasProfileData())
1308 if ((NumDup > Succ->
succ_size()) || !Duplicate)
1331void MachineBlockPlacement::precomputeTriangleChains() {
1332 struct TriangleChain {
1333 std::vector<MachineBasicBlock *> Edges;
1335 TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
1336 : Edges({src, dst}) {}
1338 void append(MachineBasicBlock *dst) {
1339 assert(getKey()->isSuccessor(dst) &&
1340 "Attempting to append a block that is not a successor.");
1341 Edges.push_back(dst);
1344 unsigned count()
const {
return Edges.size() - 1; }
1346 MachineBasicBlock *getKey()
const {
return Edges.back(); }
1355 DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
1356 for (MachineBasicBlock &BB : *
F) {
1360 MachineBasicBlock *PDom =
nullptr;
1361 for (MachineBasicBlock *Succ : BB.
successors()) {
1369 if (PDom ==
nullptr)
1376 if (!shouldTailDuplicate(PDom))
1378 bool CanTailDuplicate =
true;
1385 CanTailDuplicate =
false;
1391 if (!CanTailDuplicate)
1398 auto Found = TriangleChainMap.
find(&BB);
1401 if (Found != TriangleChainMap.
end()) {
1402 TriangleChain Chain = std::move(Found->second);
1403 TriangleChainMap.
erase(Found);
1405 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1407 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1408 assert(InsertResult.second &&
"Block seen twice.");
1416 for (
auto &ChainPair : TriangleChainMap) {
1417 TriangleChain &Chain = ChainPair.second;
1423 MachineBasicBlock *dst = Chain.Edges.back();
1424 Chain.Edges.pop_back();
1425 for (MachineBasicBlock *src :
reverse(Chain.Edges)) {
1428 <<
" as pre-computed based on triangles.\n");
1430 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1431 assert(InsertResult.second &&
"Block seen twice.");
1441static BranchProbability
1477bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1478 const MachineBasicBlock *BB,
const MachineBasicBlock *Succ,
1479 const BlockChain &SuccChain, BranchProbability SuccProb,
1480 BranchProbability RealSuccProb,
const BlockChain &Chain,
1481 const BlockFilterSet *BlockFilter) {
1484 if (SuccChain.UnscheduledPredecessors == 0)
1609 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1610 bool BadCFGConflict =
false;
1613 BlockChain *PredChain = BlockToChain[Pred];
1614 if (Pred == Succ || PredChain == &SuccChain ||
1615 (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain ||
1616 Pred != *std::prev(PredChain->end()) ||
1635 BlockFrequency PredEdgeFreq =
1637 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1638 BadCFGConflict =
true;
1643 if (BadCFGConflict) {
1645 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1662MachineBlockPlacement::BlockAndTailDupResult
1663MachineBlockPlacement::selectBestSuccessor(
const MachineBasicBlock *BB,
1664 const BlockChain &Chain,
1665 const BlockFilterSet *BlockFilter) {
1668 BlockAndTailDupResult BestSucc = {
nullptr,
false};
1671 SmallVector<MachineBasicBlock *, 4> Successors;
1672 auto AdjustedSumProb =
1673 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1680 auto FoundEdge = ComputedEdges.
find(BB);
1681 if (FoundEdge != ComputedEdges.
end()) {
1682 BlockAndTailDupResult
Result = FoundEdge->second;
1683 ComputedEdges.
erase(FoundEdge);
1684 BlockChain *SuccChain = BlockToChain[
Result.BB];
1686 (!BlockFilter || BlockFilter->count(
Result.BB)) &&
1687 SuccChain != &Chain &&
Result.BB == *SuccChain->begin())
1693 if (isTrellis(BB, Successors, Chain, BlockFilter))
1694 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1702 for (MachineBasicBlock *Succ : Successors) {
1704 BranchProbability SuccProb =
1707 BlockChain &SuccChain = *BlockToChain[Succ];
1710 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1711 Chain, BlockFilter)) {
1713 if (allowTailDupPlacement(*
F) && shouldTailDuplicate(Succ))
1720 <<
", probability: " << SuccProb
1721 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1724 if (BestSucc.BB && BestProb >= SuccProb) {
1731 BestProb = SuccProb;
1739 [](std::tuple<BranchProbability, MachineBasicBlock *> L,
1740 std::tuple<BranchProbability, MachineBasicBlock *> R) {
1741 return std::get<0>(L) > std::get<0>(R);
1743 for (
auto &Tup : DupCandidates) {
1744 BranchProbability DupProb;
1745 MachineBasicBlock *Succ;
1746 std::tie(DupProb, Succ) = Tup;
1747 if (DupProb < BestProb)
1749 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) &&
1750 (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1752 <<
", probability: " << DupProb
1753 <<
" (Tail Duplicate)\n");
1755 BestSucc.ShouldTailDup =
true;
1776MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
1777 const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
1783 return BlockToChain.
lookup(BB) == &Chain;
1786 if (WorkList.
empty())
1789 bool IsEHPad = WorkList[0]->isEHPad();
1791 MachineBasicBlock *BestBlock =
nullptr;
1792 BlockFrequency BestFreq;
1793 for (MachineBasicBlock *
MBB : WorkList) {
1795 "EHPad mismatch between block and work list.");
1797 BlockChain &SuccChain = *BlockToChain[
MBB];
1798 if (&SuccChain == &Chain)
1801 assert(SuccChain.UnscheduledPredecessors == 0 &&
1802 "Found CFG-violating block");
1804 BlockFrequency CandidateFreq = MBFI->getBlockFreq(
MBB);
1827 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1831 BestFreq = CandidateFreq;
1844MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1845 const BlockChain &PlacedChain,
1850 if (BlockChain *Chain = BlockToChain[&*
I]; Chain != &PlacedChain) {
1851 PrevUnplacedBlockIt =
I;
1855 return *Chain->begin();
1871MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
1872 const BlockChain &PlacedChain,
1873 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
1874 const BlockFilterSet *BlockFilter) {
1876 for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
1877 ++PrevUnplacedBlockInFilterIt) {
1878 BlockChain *
C = BlockToChain[*PrevUnplacedBlockInFilterIt];
1879 if (
C != &PlacedChain) {
1886void MachineBlockPlacement::fillWorkLists(
1887 const MachineBasicBlock *
MBB, SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
1888 const BlockFilterSet *BlockFilter =
nullptr) {
1889 BlockChain &Chain = *BlockToChain[
MBB];
1890 if (!UpdatedPreds.
insert(&Chain).second)
1894 Chain.UnscheduledPredecessors == 0 &&
1895 "Attempting to place block with unscheduled predecessors in worklist.");
1896 for (MachineBasicBlock *ChainBB : Chain) {
1897 assert(BlockToChain[ChainBB] == &Chain &&
1898 "Block in chain doesn't match BlockToChain map.");
1899 for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
1900 if (BlockFilter && !BlockFilter->count(Pred))
1902 if (BlockToChain[Pred] == &Chain)
1904 ++Chain.UnscheduledPredecessors;
1908 if (Chain.UnscheduledPredecessors != 0)
1911 MachineBasicBlock *BB = *Chain.
begin();
1918void MachineBlockPlacement::buildChain(
const MachineBasicBlock *HeadBB,
1920 BlockFilterSet *BlockFilter) {
1921 assert(HeadBB &&
"BB must not be null.\n");
1922 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1924 BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
1926 PrevUnplacedBlockInFilterIt = BlockFilter->begin();
1928 const MachineBasicBlock *LoopHeaderBB = HeadBB;
1929 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1930 MachineBasicBlock *BB = *std::prev(Chain.end());
1932 assert(BB &&
"null block found at end of chain in loop.");
1933 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1934 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1938 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1939 MachineBasicBlock *BestSucc =
Result.BB;
1940 bool ShouldTailDup =
Result.ShouldTailDup;
1941 if (allowTailDupPlacement(*
F))
1942 ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
1943 BB, BestSucc, Chain, BlockFilter));
1949 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1951 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1955 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
1958 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
1962 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the "
1963 "layout successor until the CFG reduces\n");
1968 if (allowTailDupPlacement(*
F) && BestSucc && ShouldTailDup) {
1969 repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1970 BlockFilter, PrevUnplacedBlockIt,
1971 PrevUnplacedBlockInFilterIt);
1979 BlockChain &SuccChain = *BlockToChain[BestSucc];
1982 SuccChain.UnscheduledPredecessors = 0;
1985 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1986 Chain.merge(BestSucc, &SuccChain);
1987 BB = *std::prev(Chain.end());
2008bool MachineBlockPlacement::canMoveBottomBlockToTop(
2009 const MachineBasicBlock *BottomBlock,
const MachineBasicBlock *OldTop) {
2012 MachineBasicBlock *Pred = *BottomBlock->
pred_begin();
2016 MachineBasicBlock *OtherBB = *Pred->
succ_begin();
2017 if (OtherBB == BottomBlock)
2019 if (OtherBB == OldTop)
2027MachineBlockPlacement::TopFallThroughFreq(
const MachineBasicBlock *Top,
2028 const BlockFilterSet &LoopBlockSet) {
2029 BlockFrequency MaxFreq = BlockFrequency(0);
2031 BlockChain *PredChain = BlockToChain[Pred];
2032 if (!LoopBlockSet.count(Pred) &&
2033 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2038 for (MachineBasicBlock *Succ : Pred->
successors()) {
2040 BlockChain *SuccChain = BlockToChain[Succ];
2043 if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
2044 (!SuccChain || Succ == *SuccChain->begin())) {
2050 BlockFrequency EdgeFreq =
2052 if (EdgeFreq > MaxFreq)
2081BlockFrequency MachineBlockPlacement::FallThroughGains(
2082 const MachineBasicBlock *NewTop,
const MachineBasicBlock *OldTop,
2083 const MachineBasicBlock *ExitBB,
const BlockFilterSet &LoopBlockSet) {
2084 BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
2085 BlockFrequency FallThrough2Exit = BlockFrequency(0);
2089 BlockFrequency BackEdgeFreq =
2093 MachineBasicBlock *BestPred =
nullptr;
2094 BlockFrequency FallThroughFromPred = BlockFrequency(0);
2095 for (MachineBasicBlock *Pred : NewTop->
predecessors()) {
2096 if (!LoopBlockSet.count(Pred))
2098 BlockChain *PredChain = BlockToChain[Pred];
2099 if (!PredChain || Pred == *std::prev(PredChain->end())) {
2100 BlockFrequency EdgeFreq =
2102 if (EdgeFreq > FallThroughFromPred) {
2103 FallThroughFromPred = EdgeFreq;
2111 BlockFrequency NewFreq = BlockFrequency(0);
2113 for (MachineBasicBlock *Succ : BestPred->
successors()) {
2114 if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
2118 BlockChain *SuccChain = BlockToChain[Succ];
2119 if ((SuccChain && (Succ != *SuccChain->begin())) ||
2120 (SuccChain == BlockToChain[BestPred]))
2122 BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
2124 if (EdgeFreq > NewFreq)
2127 BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
2129 if (NewFreq > OrigEdgeFreq) {
2133 NewFreq = BlockFrequency(0);
2134 FallThroughFromPred = BlockFrequency(0);
2138 BlockFrequency
Result = BlockFrequency(0);
2139 BlockFrequency Gains = BackEdgeFreq + NewFreq;
2140 BlockFrequency Lost =
2141 FallThrough2Top + FallThrough2Exit + FallThroughFromPred;
2169MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper(
2170 MachineBasicBlock *OldTop,
const MachineLoop &L,
2171 const BlockFilterSet &LoopBlockSet) {
2175 BlockChain &HeaderChain = *BlockToChain[OldTop];
2176 if (!LoopBlockSet.count(*HeaderChain.begin()))
2178 if (OldTop != *HeaderChain.begin())
2184 BlockFrequency BestGains = BlockFrequency(0);
2185 MachineBasicBlock *BestPred =
nullptr;
2186 for (MachineBasicBlock *Pred : OldTop->
predecessors()) {
2187 if (!LoopBlockSet.count(Pred))
2189 if (Pred ==
L.getHeader())
2197 MachineBasicBlock *OtherBB =
nullptr;
2200 if (OtherBB == OldTop)
2204 if (!canMoveBottomBlockToTop(Pred, OldTop))
2207 BlockFrequency Gains =
2208 FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet);
2209 if ((Gains > BlockFrequency(0)) &&
2210 (Gains > BestGains ||
2225 (*BestPred->
pred_begin())->succ_size() == 1 &&
2238MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
2239 const BlockFilterSet &LoopBlockSet) {
2248 return L.getHeader();
2250 MachineBasicBlock *OldTop =
nullptr;
2251 MachineBasicBlock *NewTop =
L.getHeader();
2252 while (NewTop != OldTop) {
2254 NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
2255 if (NewTop != OldTop)
2256 ComputedEdges[NewTop] = {OldTop,
false};
2267MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
2268 const BlockFilterSet &LoopBlockSet,
2269 BlockFrequency &ExitFreq) {
2278 BlockChain &HeaderChain = *BlockToChain[
L.getHeader()];
2279 if (!LoopBlockSet.count(*HeaderChain.begin()))
2282 BlockFrequency BestExitEdgeFreq;
2283 unsigned BestExitLoopDepth = 0;
2284 MachineBasicBlock *ExitingBB =
nullptr;
2288 SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
2292 for (MachineBasicBlock *
MBB :
L.getBlocks()) {
2293 BlockChain &Chain = *BlockToChain[
MBB];
2296 if (
MBB != *std::prev(Chain.end()))
2303 MachineBasicBlock *OldExitingBB = ExitingBB;
2304 BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
2305 bool HasLoopingSucc =
false;
2311 BlockChain &SuccChain = *BlockToChain[Succ];
2313 if (&Chain == &SuccChain) {
2320 if (LoopBlockSet.count(Succ)) {
2323 HasLoopingSucc =
true;
2327 unsigned SuccLoopDepth = 0;
2328 if (MachineLoop *ExitLoop = MLI->
getLoopFor(Succ)) {
2329 SuccLoopDepth = ExitLoop->getLoopDepth();
2330 if (ExitLoop->contains(&L))
2334 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(
MBB) * SuccProb;
2337 <<
getBlockName(Succ) <<
" [L:" << SuccLoopDepth <<
"] ("
2344 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
2345 ExitEdgeFreq > BestExitEdgeFreq ||
2347 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
2348 BestExitEdgeFreq = ExitEdgeFreq;
2353 if (!HasLoopingSucc) {
2355 ExitingBB = OldExitingBB;
2356 BestExitEdgeFreq = OldBestExitEdgeFreq;
2363 dbgs() <<
" No other candidate exit blocks, using loop header\n");
2366 if (
L.getNumBlocks() == 1) {
2367 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
2374 if (!BlocksExitingToOuterLoop.
empty() &&
2375 !BlocksExitingToOuterLoop.
count(ExitingBB))
2380 ExitFreq = BestExitEdgeFreq;
2388bool MachineBlockPlacement::hasViableTopFallthrough(
2389 const MachineBasicBlock *Top,
const BlockFilterSet &LoopBlockSet) {
2391 BlockChain *PredChain = BlockToChain[Pred];
2392 if (!LoopBlockSet.count(Pred) &&
2393 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2398 for (MachineBasicBlock *Succ : Pred->
successors()) {
2400 BlockChain *SuccChain = BlockToChain[Succ];
2403 if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
2421void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
2422 const MachineBasicBlock *ExitingBB,
2423 BlockFrequency ExitFreq,
2424 const BlockFilterSet &LoopBlockSet) {
2428 MachineBasicBlock *Top = *LoopChain.begin();
2429 MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
2432 if (Bottom == ExitingBB)
2439 bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
2444 if (ViableTopFallthrough) {
2445 for (MachineBasicBlock *Succ : Bottom->
successors()) {
2446 BlockChain *SuccChain = BlockToChain[Succ];
2447 if (!LoopBlockSet.count(Succ) &&
2448 (!SuccChain || Succ == *SuccChain->begin()))
2454 BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2455 if (FallThrough2Top >= ExitFreq)
2459 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
2460 if (ExitIt == LoopChain.end())
2482 if (ViableTopFallthrough) {
2483 assert(std::next(ExitIt) != LoopChain.end() &&
2484 "Exit should not be last BB");
2485 MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
2493 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2509void MachineBlockPlacement::rotateLoopWithProfile(
2510 BlockChain &LoopChain,
const MachineLoop &L,
2511 const BlockFilterSet &LoopBlockSet) {
2512 auto RotationPos = LoopChain.end();
2513 MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
2523 auto ScaleBlockFrequency = [](BlockFrequency Freq,
2524 unsigned Scale) -> BlockFrequency {
2526 return BlockFrequency(0);
2529 return Freq / BranchProbability(1, Scale);
2535 BlockFrequency HeaderFallThroughCost(0);
2537 BlockChain *PredChain = BlockToChain[Pred];
2538 if (!LoopBlockSet.count(Pred) &&
2539 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2540 auto EdgeFreq = MBFI->getBlockFreq(Pred) *
2542 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2546 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2547 HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
2556 for (
auto *BB : LoopChain) {
2559 BlockChain *SuccChain = BlockToChain[Succ];
2560 if (!LoopBlockSet.count(Succ) &&
2561 (!SuccChain || Succ == *SuccChain->begin())) {
2563 LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
2567 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2575 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2576 EndIter = LoopChain.end();
2577 Iter != EndIter; Iter++, TailIter++) {
2580 if (TailIter == LoopChain.end())
2581 TailIter = LoopChain.begin();
2583 auto TailBB = *TailIter;
2586 BlockFrequency
Cost = BlockFrequency(0);
2591 if (Iter != LoopChain.begin())
2592 Cost += HeaderFallThroughCost;
2596 for (
auto &ExitWithFreq : ExitsWithFreq)
2597 if (TailBB != ExitWithFreq.first)
2598 Cost += ExitWithFreq.second;
2614 if (TailBB->isSuccessor(*Iter)) {
2615 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2616 if (TailBB->succ_size() == 1)
2618 else if (TailBB->succ_size() == 2) {
2620 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2621 auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
2622 ? TailBBFreq * TailToHeadProb.
getCompl()
2633 if (
Cost < SmallestRotationCost) {
2634 SmallestRotationCost =
Cost;
2639 if (RotationPos != LoopChain.end()) {
2641 <<
" to the top\n");
2642 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2650MachineBlockPlacement::BlockFilterSet
2651MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2655 bool operator()(
const MachineBasicBlock *
X,
2656 const MachineBasicBlock *
Y)
const {
2657 return X->getNumber() <
Y->getNumber();
2660 std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
2672 BlockFrequency LoopFreq(0);
2673 for (
auto *LoopPred :
L.getHeader()->predecessors())
2674 if (!
L.contains(LoopPred))
2675 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2678 for (MachineBasicBlock *LoopBB :
L.getBlocks()) {
2679 if (LoopBlockSet.count(LoopBB))
2684 BlockChain *Chain = BlockToChain[LoopBB];
2685 for (MachineBasicBlock *ChainBB : *Chain)
2686 LoopBlockSet.insert(ChainBB);
2689 LoopBlockSet.insert(
L.block_begin(),
L.block_end());
2694 BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
2704void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2707 for (
const MachineLoop *InnerLoop : L)
2708 buildLoopChains(*InnerLoop);
2711 "BlockWorkList not empty when starting to build loop chains.");
2713 "EHPadWorkList not empty when starting to build loop chains.");
2714 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2719 bool RotateLoopWithProfile =
2727 MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2735 PreferredLoopExit =
nullptr;
2736 BlockFrequency ExitFreq;
2737 if (!RotateLoopWithProfile && LoopTop ==
L.getHeader())
2738 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2740 BlockChain &LoopChain = *BlockToChain[LoopTop];
2745 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2746 assert(LoopChain.UnscheduledPredecessors == 0 &&
2747 "LoopChain should not have unscheduled predecessors.");
2748 UpdatedPreds.
insert(&LoopChain);
2750 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2751 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2753 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2755 if (RotateLoopWithProfile)
2756 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2758 rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2762 bool BadLoop =
false;
2763 if (LoopChain.UnscheduledPredecessors) {
2765 dbgs() <<
"Loop chain contains a block without its preds placed!\n"
2766 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2767 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n";
2769 for (MachineBasicBlock *ChainBB : LoopChain) {
2771 if (!LoopBlockSet.remove(ChainBB)) {
2775 dbgs() <<
"Loop chain contains a block not contained by the loop!\n"
2776 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2777 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2782 if (!LoopBlockSet.empty()) {
2784 for (
const MachineBasicBlock *LoopBB : LoopBlockSet)
2785 dbgs() <<
"Loop contains blocks never placed into a chain!\n"
2786 <<
" Loop header: " <<
getBlockName(*
L.block_begin()) <<
"\n"
2787 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n"
2790 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2793 BlockWorkList.
clear();
2794 EHPadWorkList.
clear();
2797void MachineBlockPlacement::buildCFGChains() {
2803 MachineBasicBlock *BB = &*FI;
2805 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2810 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2815 MachineBasicBlock *NextBB = &*NextFI;
2818 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2819 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: "
2822 Chain->merge(NextBB,
nullptr);
2824 BlocksWithUnanalyzableExits.
insert(&*BB);
2832 PreferredLoopExit =
nullptr;
2833 for (MachineLoop *L : *MLI)
2834 buildLoopChains(*L);
2837 "BlockWorkList should be empty before building final chain.");
2839 "EHPadWorkList should be empty before building final chain.");
2841 SmallPtrSet<BlockChain *, 4> UpdatedPreds;
2842 for (MachineBasicBlock &
MBB : *
F)
2843 fillWorkLists(&
MBB, UpdatedPreds);
2845 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2846 buildChain(&
F->front(), FunctionChain);
2849 using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
2853 bool BadFunc =
false;
2854 FunctionBlockSetType FunctionBlockSet;
2855 for (MachineBasicBlock &
MBB : *
F)
2856 FunctionBlockSet.insert(&
MBB);
2858 for (MachineBasicBlock *ChainBB : FunctionChain)
2859 if (!FunctionBlockSet.erase(ChainBB)) {
2861 dbgs() <<
"Function chain contains a block not in the function!\n"
2865 if (!FunctionBlockSet.empty()) {
2867 for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
2868 dbgs() <<
"Function contains blocks never placed into a chain!\n"
2869 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2871 assert(!BadFunc &&
"Detected problems with the block placement.");
2876 SmallVector<MachineBasicBlock *, 4> OriginalLayoutSuccessors(
2877 F->getNumBlockIDs());
2879 MachineBasicBlock *LastMBB =
nullptr;
2880 for (
auto &
MBB : *
F) {
2881 if (LastMBB !=
nullptr)
2885 OriginalLayoutSuccessors[
F->back().getNumber()] =
nullptr;
2891 for (MachineBasicBlock *ChainBB : FunctionChain) {
2892 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain "
2896 F->splice(InsertPos, ChainBB);
2901 if (ChainBB == *FunctionChain.begin())
2909 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2912 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2918 "Unexpected block with un-analyzable fallthrough!");
2920 TBB = FBB =
nullptr;
2952 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2954 MachineBasicBlock *PrevBB = &
F->
back();
2958 BlockWorkList.
clear();
2959 EHPadWorkList.
clear();
2962void MachineBlockPlacement::optimizeBranches() {
2963 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
2972 for (MachineBasicBlock *ChainBB : FunctionChain) {
2974 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
2995 auto Dl = ChainBB->findBranchDebugLoc();
3001void MachineBlockPlacement::alignBlocks() {
3008 if (
F->getFunction().hasMinSize() ||
3013 BlockChain &FunctionChain = *BlockToChain[&
F->front()];
3015 if (FunctionChain.begin() == FunctionChain.end())
3018 const BranchProbability ColdProb(1, 5);
3019 BlockFrequency EntryFreq = MBFI->getBlockFreq(&
F->front());
3020 BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
3021 for (MachineBasicBlock *ChainBB : FunctionChain) {
3022 if (ChainBB == *FunctionChain.begin())
3029 MachineLoop *
L = MLI->getLoopFor(ChainBB);
3034 unsigned MDAlign = 1;
3035 MDNode *LoopID =
L->getLoopID();
3044 if (S->
getString() ==
"llvm.loop.align") {
3046 "per-loop align metadata should have two operands.");
3049 assert(MDAlign >= 1 &&
"per-loop align value must be positive.");
3055 const Align LoopAlign = std::max(TLIAlign,
Align(MDAlign));
3061 BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
3062 if (Freq < WeightedEntryFreq)
3067 MachineBasicBlock *LoopHeader =
L->getHeader();
3068 BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
3069 if (Freq < (LoopHeaderFreq * ColdProb))
3079 MachineBasicBlock *LayoutPred =
3082 auto DetermineMaxAlignmentPadding = [&]() {
3089 ChainBB->setMaxBytesForAlignment(MaxBytes);
3095 ChainBB->setAlignment(LoopAlign);
3096 DetermineMaxAlignmentPadding();
3104 BranchProbability LayoutProb =
3106 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
3107 if (LayoutEdgeFreq <= (Freq * ColdProb)) {
3108 ChainBB->setAlignment(LoopAlign);
3109 DetermineMaxAlignmentPadding();
3113 const bool HasMaxBytesOverride =
3118 for (MachineBasicBlock &
MBB : *
F) {
3119 if (HasMaxBytesOverride)
3128 for (
auto MBI = std::next(
F->begin()), MBE =
F->end(); MBI != MBE; ++MBI) {
3129 auto LayoutPred = std::prev(MBI);
3131 if (HasMaxBytesOverride)
3156bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
3157 MachineBasicBlock *BB, MachineBasicBlock *&LPred,
3158 const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
3160 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
3161 bool Removed, DuplicatedToLPred;
3162 bool DuplicatedToOriginalLPred;
3163 Removed = maybeTailDuplicateBlock(
3164 BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3165 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3168 DuplicatedToOriginalLPred = DuplicatedToLPred;
3173 while (DuplicatedToLPred && Removed) {
3174 MachineBasicBlock *DupBB, *DupPred;
3180 BlockChain::iterator ChainEnd = Chain.end();
3181 DupBB = *(--ChainEnd);
3183 if (ChainEnd == Chain.begin())
3185 DupPred = *std::prev(ChainEnd);
3186 Removed = maybeTailDuplicateBlock(
3187 DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
3188 PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
3195 LPred = *std::prev(Chain.end());
3196 if (DuplicatedToOriginalLPred)
3197 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
3214bool MachineBlockPlacement::maybeTailDuplicateBlock(
3215 MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
3217 BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
3218 bool &DuplicatedToLPred) {
3219 DuplicatedToLPred =
false;
3220 if (!shouldTailDuplicate(BB))
3228 bool Removed =
false;
3229 auto RemovalCallback = [&](MachineBasicBlock *RemBB) {
3234 if (
auto It = BlockToChain.
find(RemBB); It != BlockToChain.
end()) {
3235 It->second->remove(RemBB);
3236 BlockToChain.
erase(It);
3240 if (&(*PrevUnplacedBlockIt) == RemBB) {
3241 PrevUnplacedBlockIt++;
3245 if (RemBB->isEHPad()) {
3256 if (It != BlockFilter->end()) {
3257 if (It < PrevUnplacedBlockInFilterIt) {
3258 const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
3261 auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
3262 PrevUnplacedBlockInFilterIt = BlockFilter->
erase(It) + Distance;
3263 assert(*PrevUnplacedBlockInFilterIt == PrevBB);
3265 }
else if (It == PrevUnplacedBlockInFilterIt)
3268 PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
3270 BlockFilter->erase(It);
3275 MLI->removeBlock(RemBB);
3276 if (RemBB == PreferredLoopExit)
3277 PreferredLoopExit =
nullptr;
3282 auto RemovalCallbackRef =
3283 function_ref<void(MachineBasicBlock *)>(RemovalCallback);
3288 SmallVectorImpl<MachineBasicBlock *> *CandidatePtr =
nullptr;
3289 if (
F->getFunction().hasProfileData()) {
3291 findDuplicateCandidates(CandidatePreds, BB, BlockFilter);
3292 if (CandidatePreds.
size() == 0)
3295 CandidatePtr = &CandidatePreds;
3298 &RemovalCallbackRef, CandidatePtr);
3301 DuplicatedToLPred =
false;
3302 for (MachineBasicBlock *Pred : DuplicatedPreds) {
3304 BlockChain *PredChain = BlockToChain[Pred];
3306 DuplicatedToLPred =
true;
3307 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) ||
3308 PredChain == &Chain)
3310 for (MachineBasicBlock *NewSucc : Pred->
successors()) {
3311 if (BlockFilter && !BlockFilter->count(NewSucc))
3313 BlockChain *NewChain = BlockToChain[NewSucc];
3314 if (NewChain != &Chain && NewChain != PredChain)
3315 NewChain->UnscheduledPredecessors++;
3325 if (!
MI.isPHI() && !
MI.isMetaInstruction())
3334BlockFrequency MachineBlockPlacement::scaleThreshold(MachineBasicBlock *BB) {
3339bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
3340 MachineBasicBlock *Pred,
3341 BlockFilterSet *BlockFilter) {
3344 if (BlockFilter && !BlockFilter->count(Pred))
3346 BlockChain *PredChain = BlockToChain[Pred];
3347 if (PredChain && (Pred != *std::prev(PredChain->end())))
3352 for (MachineBasicBlock *Succ : Pred->
successors())
3354 if (BlockFilter && !BlockFilter->count(Succ))
3356 BlockChain *SuccChain = BlockToChain[Succ];
3357 if (SuccChain && (Succ != *SuccChain->begin()))
3360 if (SuccProb > BestProb)
3361 BestProb = SuccProb;
3365 if (BBProb <= BestProb)
3370 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3371 BlockFrequency Gain = PredFreq * (BBProb - BestProb);
3372 return Gain > scaleThreshold(BB);
3377void MachineBlockPlacement::findDuplicateCandidates(
3378 SmallVectorImpl<MachineBasicBlock *> &Candidates, MachineBasicBlock *BB,
3379 BlockFilterSet *BlockFilter) {
3380 MachineBasicBlock *Fallthrough =
nullptr;
3382 BlockFrequency BBDupThreshold(scaleThreshold(BB));
3387 auto CmpSucc = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3390 auto CmpPred = [&](MachineBasicBlock *
A, MachineBasicBlock *
B) {
3391 return MBFI->getBlockFreq(
A) > MBFI->getBlockFreq(
B);
3396 auto SuccIt = Succs.begin();
3397 if (SuccIt != Succs.end()) {
3442 for (MachineBasicBlock *Pred : Preds) {
3443 BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
3448 if (!Fallthrough && isBestSuccessor(BB, Pred, BlockFilter)) {
3450 if (SuccIt != Succs.end())
3456 BlockFrequency OrigCost = PredFreq + PredFreq * DefaultBranchProb;
3457 BlockFrequency DupCost;
3458 if (SuccIt == Succs.end()) {
3460 if (Succs.size() > 0)
3461 DupCost += PredFreq;
3464 DupCost += PredFreq;
3468 assert(OrigCost >= DupCost);
3469 OrigCost -= DupCost;
3470 if (OrigCost > BBDupThreshold) {
3472 if (SuccIt != Succs.end())
3480 if ((Candidates.
size() < Preds.size()) && (Candidates.
size() > 0)) {
3481 Candidates[0] = Candidates.
back();
3487void MachineBlockPlacement::initTailDupThreshold() {
3488 DupThreshold = BlockFrequency(0);
3489 if (
F->getFunction().hasProfileData()) {
3493 UseProfileCount =
true;
3498 BlockFrequency MaxFreq = BlockFrequency(0);
3499 for (MachineBasicBlock &
MBB : *
F) {
3500 BlockFrequency Freq = MBFI->getBlockFreq(&
MBB);
3506 DupThreshold = BlockFrequency(MaxFreq * ThresholdProb);
3507 UseProfileCount =
false;
3519 if (OptLevel >= CodeGenOptLevel::Aggressive) {
3531 (OptLevel < CodeGenOptLevel::Aggressive ||
3533 TailDupSize =
TII->getTailDuplicateSize(OptLevel);
3540 auto MBFI = std::make_unique<MBFIWrapper>(
3543 auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
3547 .getCachedResult<ProfileSummaryAnalysis>(
3552 MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
3564 OS << MapClassName2PassName(
name());
3565 if (!AllowTailMerge)
3566 OS <<
"<no-tail-merge>";
3572 if (std::next(MF.
begin()) == MF.
end())
3576 OptLevel =
F->getTarget().getOptLevel();
3583 PreferredLoopExit =
nullptr;
3586 "BlockToChain map should be empty before starting placement.");
3588 "Computed Edge map should be empty before starting placement.");
3591 initTailDupThreshold();
3593 const bool OptForSize =
3598 bool UseExtTspForPerf =
false;
3599 bool UseExtTspForSize =
false;
3608 if (allowTailDupPlacement(*
F)) {
3611 const bool PreRegAlloc =
false;
3612 TailDup.
initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
3614 if (!UseExtTspForSize)
3615 precomputeTriangleChains();
3619 if (!UseExtTspForSize)
3629 if (EnableTailMerge) {
3631 BranchFolder BF(
true,
false,
3639 if (!UseExtTspForSize) {
3641 BlockToChain.
clear();
3642 ComputedEdges.
clear();
3652 if (UseExtTspForPerf || UseExtTspForSize) {
3654 !(UseExtTspForPerf && UseExtTspForSize) &&
3655 "UseExtTspForPerf and UseExtTspForSize can not be set simultaneously");
3656 applyExtTsp(UseExtTspForSize);
3657 createCFGChainExtTsp();
3663 BlockToChain.
clear();
3664 ComputedEdges.
clear();
3673 MBFI->view(
"MBP." + MF.
getName(),
false);
3681void MachineBlockPlacement::applyExtTsp(
bool OptForSize) {
3683 DenseMap<const MachineBasicBlock *, uint64_t> BlockIndex;
3685 std::vector<const MachineBasicBlock *> CurrentBlockOrder;
3686 CurrentBlockOrder.reserve(
F->size());
3687 size_t NumBlocks = 0;
3688 for (
const MachineBasicBlock &
MBB : *
F) {
3689 BlockIndex[&
MBB] = NumBlocks++;
3690 CurrentBlockOrder.push_back(&
MBB);
3693 SmallVector<uint64_t, 0> BlockCounts(
F->size());
3694 SmallVector<uint64_t, 0> BlockSizes(
F->size());
3698 for (MachineBasicBlock &
MBB : *
F) {
3700 BlockFrequency BlockFreq = MBFI->getBlockFreq(&
MBB);
3701 BlockCounts[BlockIndex[&
MBB]] = OptForSize ? 1 : BlockFreq.
getFrequency();
3710 size_t NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
3711 BlockSizes[BlockIndex[&
MBB]] = 4 * NumInsts;
3716 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3727 if (FBB && FBB != FTB)
3734 const uint64_t Freq = Succs.
size() == 1 ? 110 : 100;
3735 for (
const MachineBasicBlock *Succ : Succs)
3736 JumpCounts.
push_back({BlockIndex[&
MBB], BlockIndex[Succ], Freq});
3740 BlockFrequency JumpFreq = BlockFreq * EP;
3747 LLVM_DEBUG(
dbgs() <<
"Applying ext-tsp layout for |V| = " <<
F->size()
3748 <<
" with profile = " <<
F->getFunction().hasProfileData()
3749 <<
" (" <<
F->getName() <<
")" <<
"\n");
3756 std::vector<const MachineBasicBlock *> NewBlockOrder;
3757 NewBlockOrder.reserve(
F->size());
3758 for (uint64_t Node : NewOrder) {
3759 NewBlockOrder.push_back(CurrentBlockOrder[Node]);
3761 const double OptScore =
calcExtTspScore(NewOrder, BlockSizes, JumpCounts);
3765 if (OptForSize && OrgScore > OptScore)
3766 assignBlockOrder(CurrentBlockOrder);
3768 assignBlockOrder(NewBlockOrder);
3771void MachineBlockPlacement::assignBlockOrder(
3772 const std::vector<const MachineBasicBlock *> &NewBlockOrder) {
3773 assert(
F->size() == NewBlockOrder.size() &&
"Incorrect size of block order");
3774 F->RenumberBlocks();
3776 bool HasChanges =
false;
3777 for (
size_t I = 0;
I < NewBlockOrder.size();
I++) {
3778 if (NewBlockOrder[
I] !=
F->getBlockNumbered(
I)) {
3787 SmallVector<MachineBasicBlock *, 4> PrevFallThroughs(
F->getNumBlockIDs());
3788 for (
auto &
MBB : *
F) {
3793 DenseMap<const MachineBasicBlock *, size_t> NewIndex;
3794 for (
const MachineBasicBlock *
MBB : NewBlockOrder) {
3795 NewIndex[
MBB] = NewIndex.
size();
3797 F->sort([&](MachineBasicBlock &L, MachineBasicBlock &R) {
3798 return NewIndex[&
L] < NewIndex[&
R];
3803 const TargetInstrInfo *
TII =
F->getSubtarget().getInstrInfo();
3805 for (
auto &
MBB : *
F) {
3812 if (FTMBB && (NextMBB == EndIt || &*NextMBB != FTMBB)) {
3818 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
3825void MachineBlockPlacement::createCFGChainExtTsp() {
3826 BlockToChain.
clear();
3827 ComputedEdges.
clear();
3830 MachineBasicBlock *HeadBB = &
F->
front();
3831 BlockChain *FunctionChain =
3832 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, HeadBB);
3834 for (MachineBasicBlock &
MBB : *
F) {
3837 FunctionChain->merge(&
MBB,
nullptr);
3849class MachineBlockPlacementStats {
3851 const MachineBranchProbabilityInfo *MBPI;
3854 const MachineBlockFrequencyInfo *MBFI;
3857 MachineBlockPlacementStats(
const MachineBranchProbabilityInfo *MBPI,
3858 const MachineBlockFrequencyInfo *MBFI)
3859 : MBPI(MBPI), MBFI(MBFI) {}
3860 bool run(MachineFunction &MF);
3863class MachineBlockPlacementStatsLegacy :
public MachineFunctionPass {
3867 MachineBlockPlacementStatsLegacy() : MachineFunctionPass(
ID) {}
3869 bool runOnMachineFunction(MachineFunction &
F)
override {
3871 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
3872 auto *MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
3873 return MachineBlockPlacementStats(MBPI, MBFI).run(
F);
3876 void getAnalysisUsage(AnalysisUsage &AU)
const override {
3877 AU.
addRequired<MachineBranchProbabilityInfoWrapperPass>();
3878 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
3886char MachineBlockPlacementStatsLegacy::ID = 0;
3891 "Basic Block Placement Stats",
false,
false)
3903 MachineBlockPlacementStats(&MBPI, &MBFI).
run(MF);
3909 if (std::next(
F.begin()) ==
F.end())
3918 (
MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
3920 (
MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
3923 if (
MBB.isLayoutSuccessor(Succ))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares methods and data structures for code layout algorithms.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static unsigned InstrCount
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExtTspBlockPlacementMaxBlocks("ext-tsp-block-placement-max-blocks", cl::desc("Maximum number of basic blocks in a function to run ext-TSP " "block placement."), cl::init(UINT_MAX), cl::Hidden)
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all blocks in the function in log2 format " "(e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > PredecessorLimit("block-placement-predecessor-limit", cl::desc("For blocks with more predecessors, certain layout optimizations" "will be disabled to prevent quadratic compile time."), cl::init(1000), cl::Hidden)
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all blocks that have no fall-through " "predecessors (i.e. don't add nops that are executed). In log2 " "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > MaxBytesForAlignmentOverride("max-bytes-for-alignment", cl::desc("Forces the maximum bytes allowed to be emitted when padding for " "alignment"), cl::init(0), cl::Hidden)
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static cl::opt< unsigned > TailDupProfilePercentThreshold("tail-dup-profile-percent-threshold", cl::desc("If profile count information is used in tail duplication cost " "model, the gained fall through number from tail duplication " "should be at least this percent of hot count."), cl::init(50), cl::Hidden)
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
static cl::opt< bool > ApplyExtTspForSize("apply-ext-tsp-for-size", cl::init(false), cl::Hidden, cl::desc("Use ext-tsp for size-aware block placement."))
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock * > &Successors)
Check if BB has exactly the successors in Successors.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunities in " "outline branches."), cl::init(true), cl::Hidden)
static uint64_t countMBBInstruction(MachineBasicBlock *MBB)
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
static cl::opt< bool > RenumberBlocksBeforeView("renumber-blocks-before-view", cl::desc("If true, basic blocks are re-numbered before MBP layout is printed " "into a dot graph. Only used when a function is being printed."), cl::init(false), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static BranchProbability getOne()
uint32_t getNumerator() const
BranchProbability getCompl() const
static BranchProbability getZero()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Module * getParent()
Get the module that this global value is contained inside of...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
unsigned succ_size() const
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
pred_iterator pred_begin()
succ_reverse_iterator succ_rbegin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
LLVM_ABI DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
LLVM_ABI Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Analysis pass that exposes the MachineLoopInfo for a machine function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
LLVM_ABI uint64_t getOrCompHotCountThreshold() const
Returns HotCountThreshold if set.
typename vector_type::const_iterator iterator
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
Represent a constant reference to a string, i.e.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const
Return the maximum amount of bytes allowed to be emitted when padding for alignment.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool requiresStructuredCFG() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LLVM_ABI double calcExtTspScore(ArrayRef< uint64_t > Order, ArrayRef< uint64_t > NodeSizes, ArrayRef< EdgeCount > EdgeCounts)
Estimate the "quality" of a given node order in CFG.
LLVM_ABI std::vector< uint64_t > computeExtTspLayout(ArrayRef< uint64_t > NodeSizes, ArrayRef< uint64_t > NodeCounts, ArrayRef< EdgeCount > EdgeCounts)
Find a layout of nodes (basic blocks) of a given CFG optimizing jump locality and thus processor I-ca...
DXILDebugInfoMap run(Module &M)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
cl::opt< bool > ApplyExtTspWithoutProfile
constexpr from_range_t from_range
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
cl::opt< unsigned > ProfileLikelyProb
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
cl::opt< std::string > ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden, cl::desc("The option to specify " "the name of the function " "whose CFG will be displayed."))
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
cl::opt< GVDAGType > ViewBlockLayoutWithBFI("view-block-layout-with-bfi", cl::Hidden, cl::desc("Pop up a window to show a dag displaying MBP layout and associated " "block frequencies of the CFG."), cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " "fractional block frequency representation."), clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " "integer fractional block frequency representation."), clEnumValN(GVDT_Count, "count", "display a graph using the real " "profile count if available.")))
bool isFunctionInPrintList(StringRef FunctionName)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
cl::opt< unsigned > StaticLikelyProb
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
cl::opt< bool > EnableExtTspBlockPlacement
LLVM_ABI char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.