54#define DEBUG_TYPE "loop-interchange"
56STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
60 cl::desc(
"Interchange if you gain more than this number"));
66 "Maximum number of load-store instructions that should be handled "
67 "in the dependency matrix. Higher value may lead to more interchanges "
68 "at the cost of compile-time"));
82using CharMatrix = std::vector<std::vector<char>>;
97 cl::desc(
"Minimum depth of loop nest considered for the transform"));
102 cl::desc(
"Maximum depth of loop nest considered for the transform"));
108 cl::desc(
"List of profitability heuristics to be used. They are applied in "
111 RuleTy::PerInstrOrderCost,
112 RuleTy::ForVectorization}),
114 "Prioritize loop cache cost"),
115 clEnumValN(RuleTy::PerInstrOrderCost,
"instorder",
116 "Prioritize the IVs order of each instruction"),
117 clEnumValN(RuleTy::ForVectorization,
"vectorize",
118 "Prioritize vectorization"),
120 "Ignore profitability, force interchange (does not "
121 "work with other options)")));
126 for (RuleTy Rule : Rules) {
127 if (!Set.insert(Rule).second)
129 if (Rule == RuleTy::Ignore)
136 for (
auto &Row : DepMatrix) {
149 assert(Src->getParent() == Dst->getParent() && Src != Dst &&
150 "Expected Src and Dst to be different instructions in the same BB");
152 bool FoundSrc =
false;
193 <<
" Loads and Stores to analyze\n");
199 L->getStartLoc(), L->getHeader())
200 <<
"Number of loads/stores exceeded, the supported maximum "
201 "can be increased with option "
202 "-loop-interchange-maxmeminstr-count.";
212 for (
I = MemInstr.
begin(), IE = MemInstr.
end();
I != IE; ++
I) {
213 for (J =
I, JE = MemInstr.
end(); J != JE; ++J) {
214 std::vector<char> Dep;
221 if (
auto D = DI->
depends(Src, Dst)) {
222 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
225 if (
D->normalize(SE))
228 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
229 dbgs() <<
"Found " << DepType
230 <<
" dependency between Src and Dst\n"
231 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
232 unsigned Levels =
D->getLevels();
234 for (
unsigned II = 1;
II <= Levels; ++
II) {
241 unsigned Dir =
D->getDirection(
II);
255 if (
D->isConfused()) {
256 assert(Dep.empty() &&
"Expected empty dependency vector");
257 Dep.assign(Level,
'*');
260 while (Dep.size() != Level) {
266 if (
all_of(Dep, [](
char C) {
return C ==
'*'; })) {
269 L->getStartLoc(), L->getHeader())
270 <<
"All loops have dependencies in all directions.";
276 bool IsKnownForward =
true;
277 if (Src->getParent() != Dst->getParent()) {
281 IsKnownForward =
false;
287 "Unexpected instructions");
292 bool IsReversed =
D->getSrc() != Src;
294 IsKnownForward =
false;
310 DepMatrix.push_back(Dep);
317 DepMatrix[Ite->second].back() =
'*';
329 for (
auto &Row : DepMatrix)
338static std::optional<bool>
351 unsigned InnerLoopId,
352 unsigned OuterLoopId) {
353 unsigned NumRows = DepMatrix.size();
354 std::vector<char> Cur;
356 for (
unsigned Row = 0; Row < NumRows; ++Row) {
359 Cur = DepMatrix[Row];
372 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
381 << L.getHeader()->getParent()->getName() <<
" Loop: %"
382 << L.getHeader()->getName() <<
'\n');
383 assert(LoopList.
empty() &&
"LoopList should initially be empty!");
384 Loop *CurrentLoop = &L;
385 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
386 while (!Vec->empty()) {
390 if (Vec->size() != 1) {
396 CurrentLoop = Vec->front();
404 unsigned LoopNestDepth = LoopList.
size();
406 LLVM_DEBUG(
dbgs() <<
"Unsupported depth of loop nest " << LoopNestDepth
414 <<
"Unsupported depth of loop nest, the supported range is ["
425 for (
Loop *L : LoopList) {
431 if (L->getNumBackEdges() != 1) {
435 if (!L->getExitingBlock()) {
446class LoopInterchangeLegality {
448 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
449 OptimizationRemarkEmitter *ORE)
450 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
453 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
454 CharMatrix &DepMatrix);
458 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
462 bool isLoopStructureUnderstood();
464 bool currentLimitations();
466 const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions()
const {
467 return OuterInnerReductions;
471 return InnerLoopInductions;
475 return HasNoWrapReductions;
479 bool tightlyNested(Loop *Outer, Loop *Inner);
480 bool containsUnsafeInstructions(BasicBlock *BB);
486 bool findInductionAndReductions(Loop *L,
496 OptimizationRemarkEmitter *ORE;
500 SmallPtrSet<PHINode *, 4> OuterInnerReductions;
508 SmallVector<Instruction *, 4> HasNoWrapReductions;
514class CacheCostManager {
516 LoopStandardAnalysisResults *AR;
521 std::optional<std::unique_ptr<CacheCost>> CC;
525 DenseMap<const Loop *, unsigned> CostMap;
527 void computeIfUnitinialized();
530 CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
532 : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
533 CacheCost *getCacheCost();
534 const DenseMap<const Loop *, unsigned> &getCostMap();
539class LoopInterchangeProfitability {
541 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
542 OptimizationRemarkEmitter *ORE)
543 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
546 bool isProfitable(
const Loop *InnerLoop,
const Loop *OuterLoop,
547 unsigned InnerLoopId,
unsigned OuterLoopId,
548 CharMatrix &DepMatrix, CacheCostManager &CCM);
551 int getInstrOrderCost();
552 std::optional<bool> isProfitablePerLoopCacheAnalysis(
553 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
554 std::optional<bool> isProfitablePerInstrOrderCost();
555 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
556 unsigned OuterLoopId,
557 CharMatrix &DepMatrix);
565 OptimizationRemarkEmitter *ORE;
569class LoopInterchangeTransform {
571 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
572 LoopInfo *LI, DominatorTree *DT,
573 const LoopInterchangeLegality &LIL)
574 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
578 void restructureLoops(Loop *NewInner, Loop *NewOuter,
579 BasicBlock *OrigInnerPreHeader,
580 BasicBlock *OrigOuterPreHeader);
581 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
584 bool adjustLoopLinks();
585 bool adjustLoopBranches();
596 const LoopInterchangeLegality &LIL;
599struct LoopInterchange {
600 ScalarEvolution *SE =
nullptr;
601 LoopInfo *LI =
nullptr;
602 DependenceInfo *DI =
nullptr;
603 DominatorTree *DT =
nullptr;
604 LoopStandardAnalysisResults *AR =
nullptr;
607 OptimizationRemarkEmitter *ORE;
609 LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
610 DominatorTree *DT, LoopStandardAnalysisResults *AR,
611 OptimizationRemarkEmitter *ORE)
612 : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
615 if (
L->getParentLoop())
617 SmallVector<Loop *, 8> LoopList;
619 return processLoopList(LoopList);
622 bool run(LoopNest &LN) {
623 SmallVector<Loop *, 8> LoopList(LN.
getLoops());
624 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
625 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
627 return processLoopList(LoopList);
633 return LoopList.
size() - 1;
636 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
641 "Unsupported depth of loop nest.");
643 unsigned LoopNestDepth = LoopList.
size();
646 dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
647 <<
" containing the following loops:\n";
648 for (
auto *L : LoopList) {
654 CharMatrix DependencyMatrix;
655 Loop *OuterMostLoop = *(LoopList.begin());
657 OuterMostLoop, DI, SE, ORE)) {
669 <<
"' needs an unique exit block");
673 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
674 CacheCostManager CCM(LoopList[0], AR, DI);
679 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
680 bool ChangedPerIter =
false;
681 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
683 processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
684 ChangedPerIter |= Interchanged;
695 bool processLoop(SmallVectorImpl<Loop *> &LoopList,
unsigned InnerLoopId,
696 unsigned OuterLoopId,
697 std::vector<std::vector<char>> &DependencyMatrix,
698 CacheCostManager &CCM) {
699 Loop *OuterLoop = LoopList[OuterLoopId];
700 Loop *InnerLoop = LoopList[InnerLoopId];
702 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
703 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
704 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
705 LLVM_DEBUG(
dbgs() <<
"Cannot prove legality, not interchanging loops '"
706 << OuterLoop->
getName() <<
"' and '"
707 << InnerLoop->
getName() <<
"'\n");
712 <<
"' are legal to interchange\n");
713 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
714 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
715 DependencyMatrix, CCM)) {
717 <<
"' and '" << InnerLoop->
getName()
718 <<
"' not profitable.\n");
723 return OptimizationRemark(
DEBUG_TYPE,
"Interchanged",
726 <<
"Loop interchanged with enclosing loop.";
729 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
730 LIT.transform(LIL.getHasNoWrapReductions());
732 << OuterLoop->
getName() <<
"' and inner loop '"
733 << InnerLoop->
getName() <<
"'\n");
739 std::swap(LoopList[OuterLoopId], LoopList[InnerLoopId]);
752bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
753 return any_of(*BB, [](
const Instruction &
I) {
754 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
758bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
764 <<
"' and '" << InnerLoop->
getName()
765 <<
"' are tightly nested\n");
770 BranchInst *OuterLoopHeaderBI =
772 if (!OuterLoopHeaderBI)
775 for (BasicBlock *Succ :
successors(OuterLoopHeaderBI))
776 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
777 Succ != OuterLoopLatch)
780 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
783 if (containsUnsafeInstructions(OuterLoopHeader) ||
784 containsUnsafeInstructions(OuterLoopLatch))
790 if (InnerLoopPreHeader != OuterLoopHeader &&
791 containsUnsafeInstructions(InnerLoopPreHeader))
799 if (&SuccInner != OuterLoopLatch) {
801 <<
" does not lead to the outer loop latch.\n";);
807 if (containsUnsafeInstructions(InnerLoopExit))
815bool LoopInterchangeLegality::isLoopStructureUnderstood() {
817 for (PHINode *InnerInduction : InnerLoopInductions) {
818 unsigned Num = InnerInduction->getNumOperands();
819 for (
unsigned i = 0; i < Num; ++i) {
820 Value *Val = InnerInduction->getOperand(i);
830 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
831 InnerLoopPreheader &&
845 BranchInst *InnerLoopLatchBI =
849 if (CmpInst *InnerLoopCmp =
851 Value *Op0 = InnerLoopCmp->getOperand(0);
852 Value *Op1 = InnerLoopCmp->getOperand(1);
863 std::function<bool(
Value *)> IsPathToInnerIndVar;
864 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
873 return IsPathToInnerIndVar(
I->getOperand(0));
875 return IsPathToInnerIndVar(
I->getOperand(0)) &&
876 IsPathToInnerIndVar(
I->getOperand(1));
882 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
890 }
else if (IsPathToInnerIndVar(Op1) && !
isa<Constant>(Op1)) {
913 if (
PHI->getNumIncomingValues() != 1)
928 if (
PHI->getNumIncomingValues() == 1)
980 assert(
I->getOpcode() == OpCode &&
981 "Expected the instruction to be the reduction operation");
986 if (
I->hasNoSignedWrap() ||
I->hasNoUnsignedWrap())
1003bool LoopInterchangeLegality::findInductionAndReductions(
1005 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
1007 for (PHINode &
PHI :
L->getHeader()->phis()) {
1008 InductionDescriptor
ID;
1015 if (!OuterInnerReductions.count(&
PHI)) {
1017 "across the outer loop.\n");
1021 assert(
PHI.getNumIncomingValues() == 2 &&
1022 "Phis in loop header should have exactly 2 incoming values");
1026 PHINode *InnerRedPhi =
1032 <<
"Failed to recognize PHI as an induction or reduction.\n");
1035 OuterInnerReductions.insert(&
PHI);
1036 OuterInnerReductions.insert(InnerRedPhi);
1045bool LoopInterchangeLegality::currentLimitations() {
1055 dbgs() <<
"Loops where the latch is not the exiting block are not"
1056 <<
" supported currently.\n");
1058 return OptimizationRemarkMissed(
DEBUG_TYPE,
"ExitingNotLatch",
1061 <<
"Loops where the latch is not the exiting block cannot be"
1062 " interchange currently.";
1068 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
1070 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
1071 <<
"are supported currently.\n");
1073 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIOuter",
1076 <<
"Only outer loops with induction or reduction PHI nodes can be"
1077 " interchanged currently.";
1086 Loop *CurLevelLoop = OuterLoop;
1089 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
1090 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
1092 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
1093 <<
"are supported currently.\n");
1095 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedPHIInner",
1098 <<
"Only inner loops with induction or reduction PHI nodes can be"
1099 " interchange currently.";
1106 if (!isLoopStructureUnderstood()) {
1109 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedStructureInner",
1112 <<
"Inner loop structure not understood currently.";
1120bool LoopInterchangeLegality::findInductions(
1121 Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
1122 for (PHINode &
PHI :
L->getHeader()->phis()) {
1123 InductionDescriptor
ID;
1127 return !Inductions.
empty();
1139 if (
PHI.getNumIncomingValues() > 1)
1142 PHINode *PN = dyn_cast<PHINode>(U);
1144 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
1209 for (
auto *U :
PHI.users()) {
1218bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
1219 unsigned OuterLoopId,
1220 CharMatrix &DepMatrix) {
1222 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
1223 <<
" and OuterLoopId = " << OuterLoopId
1224 <<
" due to dependence\n");
1226 return OptimizationRemarkMissed(
DEBUG_TYPE,
"Dependence",
1229 <<
"Cannot interchange loops due to dependences.";
1234 for (
auto *BB : OuterLoop->
blocks())
1238 if (CI->onlyWritesMemory())
1241 dbgs() <<
"Loops with call instructions cannot be interchanged "
1244 return OptimizationRemarkMissed(
DEBUG_TYPE,
"CallInst",
1247 <<
"Cannot interchange loops due to call instruction.";
1253 if (!findInductions(InnerLoop, InnerLoopInductions)) {
1254 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1259 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1261 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedInnerLatchPHI",
1264 <<
"Cannot interchange loops because unsupported PHI nodes found "
1265 "in inner loop latch.";
1272 if (currentLimitations()) {
1273 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1278 if (!tightlyNested(OuterLoop, InnerLoop)) {
1281 return OptimizationRemarkMissed(
DEBUG_TYPE,
"NotTightlyNested",
1284 <<
"Cannot interchange loops because they are not tightly "
1291 OuterInnerReductions)) {
1292 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1294 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1297 <<
"Found unsupported PHI node in loop exit.";
1303 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1305 return OptimizationRemarkMissed(
DEBUG_TYPE,
"UnsupportedExitPHI",
1308 <<
"Found unsupported PHI node in loop exit.";
1316void CacheCostManager::computeIfUnitinialized() {
1331 for (
const auto &[Idx,
Cost] :
enumerate((*CC)->getLoopCosts()))
1332 CostMap[
Cost.first] = Idx;
1335CacheCost *CacheCostManager::getCacheCost() {
1336 computeIfUnitinialized();
1340const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
1341 computeIfUnitinialized();
1345int LoopInterchangeProfitability::getInstrOrderCost() {
1346 unsigned GoodOrder, BadOrder;
1347 BadOrder = GoodOrder = 0;
1348 for (BasicBlock *BB : InnerLoop->
blocks()) {
1349 for (Instruction &Ins : *BB) {
1351 bool FoundInnerInduction =
false;
1352 bool FoundOuterInduction =
false;
1358 const SCEV *OperandVal = SE->
getSCEV(
Op);
1368 if (AR->
getLoop() == InnerLoop) {
1371 FoundInnerInduction =
true;
1372 if (FoundOuterInduction) {
1382 if (AR->
getLoop() == OuterLoop) {
1385 FoundOuterInduction =
true;
1386 if (FoundInnerInduction) {
1395 return GoodOrder - BadOrder;
1399LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1400 const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
1404 auto InnerLoopIt = CostMap.
find(InnerLoop);
1405 if (InnerLoopIt == CostMap.
end())
1406 return std::nullopt;
1407 auto OuterLoopIt = CostMap.
find(OuterLoop);
1408 if (OuterLoopIt == CostMap.
end())
1409 return std::nullopt;
1412 return std::nullopt;
1413 unsigned InnerIndex = InnerLoopIt->second;
1414 unsigned OuterIndex = OuterLoopIt->second;
1416 <<
", OuterIndex = " << OuterIndex <<
"\n");
1417 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1418 "numbers to each loop");
1419 return std::optional<bool>(InnerIndex < OuterIndex);
1423LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1427 int Cost = getInstrOrderCost();
1430 return std::optional<bool>(
true);
1432 return std::nullopt;
1437 for (
const auto &Dep : DepMatrix) {
1438 char Dir = Dep[LoopId];
1439 char DepType = Dep.back();
1440 assert((DepType ==
'<' || DepType ==
'*') &&
1441 "Unexpected element in dependency vector");
1444 if (Dir ==
'=' || Dir ==
'I')
1450 if (Dir ==
'<' && DepType ==
'<')
1459std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1460 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1476 return std::nullopt;
1479bool LoopInterchangeProfitability::isProfitable(
1480 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1481 unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1490 if (InnerBTC && InnerBTC->
isZero()) {
1491 LLVM_DEBUG(
dbgs() <<
"Inner loop back-edge isn't taken, rejecting "
1492 "single iteration loop\n");
1495 if (OuterBTC && OuterBTC->
isZero()) {
1496 LLVM_DEBUG(
dbgs() <<
"Outer loop back-edge isn't taken, rejecting "
1497 "single iteration loop\n");
1505 "Duplicate rules and option 'ignore' are not allowed");
1515 std::optional<bool> shouldInterchange;
1518 case RuleTy::PerLoopCacheAnalysis: {
1519 CacheCost *CC = CCM.getCacheCost();
1520 const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
1521 shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
1524 case RuleTy::PerInstrOrderCost:
1525 shouldInterchange = isProfitablePerInstrOrderCost();
1527 case RuleTy::ForVectorization:
1529 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1531 case RuleTy::Ignore:
1538 if (shouldInterchange.has_value())
1542 if (!shouldInterchange.has_value()) {
1544 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1547 <<
"Insufficient information to calculate the cost of loop for "
1551 }
else if (!shouldInterchange.value()) {
1553 return OptimizationRemarkMissed(
DEBUG_TYPE,
"InterchangeNotProfitable",
1556 <<
"Interchanging loops is not considered to improve cache "
1557 "locality nor vectorization.";
1564void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1566 for (Loop *L : *OuterLoop)
1567 if (L == InnerLoop) {
1568 OuterLoop->removeChildLoop(L);
1597void LoopInterchangeTransform::restructureLoops(
1598 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1599 BasicBlock *OrigOuterPreHeader) {
1600 Loop *OuterLoopParent = OuterLoop->getParentLoop();
1607 if (OuterLoopParent) {
1609 removeChildLoop(OuterLoopParent, NewInner);
1610 removeChildLoop(NewInner, NewOuter);
1613 removeChildLoop(NewInner, NewOuter);
1621 SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->
blocks());
1625 for (BasicBlock *BB : NewInner->
blocks())
1633 for (BasicBlock *BB : OrigInnerBBs) {
1638 if (BB == OuterHeader || BB == OuterLatch)
1653bool LoopInterchangeTransform::transform(
1655 bool Transformed =
false;
1660 auto &InductionPHIs = LIL.getInnerLoopInductions();
1661 if (InductionPHIs.empty()) {
1662 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1666 SmallVector<Instruction *, 8> InnerIndexVarList;
1667 for (PHINode *CurInductionPHI : InductionPHIs) {
1668 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1684 SmallSetVector<Instruction *, 4> WorkList;
1686 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1687 for (; i < WorkList.
size(); i++) {
1693 "Moving instructions with side-effects may change behavior of "
1704 for (
Value *
Op : WorkList[i]->operands()) {
1722 for (Instruction *InnerIndexVar : InnerIndexVarList)
1741 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1742 if (InnerLoopPreHeader != OuterLoopHeader) {
1743 for (Instruction &
I :
1745 std::prev(InnerLoopPreHeader->
end()))))
1749 Transformed |= adjustLoopLinks();
1757 for (Instruction *
Reduction : DropNoWrapInsts) {
1781 I->removeFromParent();
1796 std::vector<DominatorTree::UpdateType> &DTUpdates,
1797 bool MustUpdateOnce =
true) {
1799 "BI must jump to OldBB exactly once.");
1808 DTUpdates.push_back(
1809 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1810 DTUpdates.push_back(
1811 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1830 assert(
P.getNumIncomingValues() == 1 &&
1831 "Only loops with a single exit are supported!");
1840 if (IncIInnerMost->getParent() != InnerLatch &&
1841 IncIInnerMost->
getParent() != InnerHeader)
1845 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1846 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1847 IncI->getParent() == InnerHeader) ||
1848 cast<PHINode>(U)->getParent() == OuterExit;
1850 "Can only replace phis iff the uses are in the loop nest exit or "
1851 "the incoming value is defined in the inner header (it will "
1852 "dominate all loop blocks after interchanging)");
1853 P.replaceAllUsesWith(IncI);
1854 P.eraseFromParent();
1882 if (
P.getNumIncomingValues() != 1)
1896 if (Pred == OuterLatch)
1901 P.setIncomingValue(0, NewPhi);
1941 if (OuterLoopLatch == InnerLoopExit)
1948 assert(Phi->getNumIncomingValues() == 1 &&
"Single input phi expected");
1949 LLVM_DEBUG(
dbgs() <<
"Removing 1-input phi in non-exit block: " << *Phi
1951 Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
1952 Phi->eraseFromParent();
1956bool LoopInterchangeTransform::adjustLoopBranches() {
1958 std::vector<DominatorTree::UpdateType> DTUpdates;
1960 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1963 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1964 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1965 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1975 OuterLoopPreHeader =
1977 if (InnerLoopPreHeader == OuterLoop->getHeader())
1978 InnerLoopPreHeader =
1983 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1985 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1992 BranchInst *OuterLoopLatchBI =
1994 BranchInst *InnerLoopLatchBI =
1996 BranchInst *OuterLoopHeaderBI =
1998 BranchInst *InnerLoopHeaderBI =
2001 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
2002 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
2006 BranchInst *InnerLoopLatchPredecessorBI =
2008 BranchInst *OuterLoopPredecessorBI =
2011 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
2014 if (!InnerLoopHeaderSuccessor)
2022 InnerLoopPreHeader, DTUpdates,
false);
2032 InnerLoopHeaderSuccessor, DTUpdates,
2040 OuterLoopPreHeader, DTUpdates);
2043 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
2044 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
2046 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
2049 InnerLoopLatchSuccessor, DTUpdates);
2051 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
2052 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
2054 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
2057 OuterLoopLatchSuccessor, DTUpdates);
2058 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
2062 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
2063 OuterLoopPreHeader);
2065 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
2066 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
2072 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
2075 for (PHINode &
PHI : InnerLoopHeader->
phis())
2076 if (OuterInnerReductions.contains(&
PHI))
2079 for (PHINode &
PHI : OuterLoopHeader->
phis())
2080 if (OuterInnerReductions.contains(&
PHI))
2086 for (PHINode *
PHI : OuterLoopPHIs) {
2089 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2091 for (PHINode *
PHI : InnerLoopPHIs) {
2094 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
2107 SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
2108 for (Instruction &
I :
2116bool LoopInterchangeTransform::adjustLoopLinks() {
2118 bool Changed = adjustLoopBranches();
2123 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
2148 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
2156 <<
"Computed dependence info, invoking the transform.";
2160 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT, &AR, &ORE).run(LN))
2162 U.markLoopNestChanged(
true);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
ReachingDefInfo InstSet InstSet & Ignore
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the interface for the loop cache analysis.
SmallVector< Loop *, 4 > LoopVector
Loop::LoopBounds::Direction Direction
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V, SmallVectorImpl< Instruction * > &HasNoWrapInsts)
static cl::opt< unsigned int > MinLoopNestDepth("loop-interchange-min-loop-nest-depth", cl::init(2), cl::Hidden, cl::desc("Minimum depth of loop nest considered for the transform"))
static bool isComputableLoopNest(ScalarEvolution *SE, ArrayRef< Loop * > LoopList)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop)
This deals with a corner case when a LCSSA phi node appears in a non-exit block: the outer loop latch...
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static cl::opt< unsigned int > MaxLoopNestDepth("loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform"))
static bool hasSupportedLoopDepth(ArrayRef< Loop * > LoopList, OptimizationRemarkEmitter &ORE)
static bool inThisOrder(const Instruction *Src, const Instruction *Dst)
Return true if Src appears before Dst in the same basic block.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId)
Return true if we can vectorize the loop specified by LoopId.
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static std::optional< bool > isLexicographicallyPositive(ArrayRef< char > DV, unsigned Begin, unsigned End)
static cl::list< RuleTy > Profitabilities("loop-interchange-profitabilities", cl::ZeroOrMore, cl::MiscFlags::CommaSeparated, cl::Hidden, cl::desc("List of profitability heuristics to be used. They are applied in " "the given order"), cl::list_init< RuleTy >({RuleTy::PerLoopCacheAnalysis, RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}), cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", "Prioritize loop cache cost"), clEnumValN(RuleTy::PerInstrOrderCost, "instorder", "Prioritize the IVs order of each instruction"), clEnumValN(RuleTy::ForVectorization, "vectorize", "Prioritize vectorization"), clEnumValN(RuleTy::Ignore, "ignore", "Ignore profitability, force interchange (does not " "work with other options)")))
static bool noDuplicateRulesAndIgnore(ArrayRef< RuleTy > Rules)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
loop Loop Strength Reduction
uint64_t IntrinsicInst * II
SmallVector< Value *, 8 > ValueVector
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
CacheCostTy getLoopCost(const Loop &L) const
Return the estimated cost of loop L if the given loop is part of the loop nest associated with this o...
iterator find(const_arg_type_t< KeyT > Val)
DependenceInfo - This class is the main dependence-analysis driver.
LLVM_ABI std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions=false)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
StringRef getName() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
unsigned getOpcode() const
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
RecurKind getRecurrenceKind() const
const Loop * getLoop() const
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
constexpr size_t size() const
size - Get the string size.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
list_initializer< Ty > list_init(ArrayRef< Ty > Vals)
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
LLVM_ABI bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...