52 GetIntOrFpInductionDescriptor,
59 if (!VPBB->getParent())
62 auto EndIter = Term ? Term->getIterator() : VPBB->end();
67 VPValue *VPV = Ingredient.getVPSingleValue();
76 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
90 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
91 Ingredient.getDebugLoc());
99 *Load, Ingredient.getOperand(0),
nullptr ,
100 false ,
false , *VPI,
101 Ingredient.getDebugLoc());
104 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
105 nullptr ,
false ,
false , *VPI,
106 Ingredient.getDebugLoc());
109 Ingredient.getDebugLoc());
117 *VPI, CI->getDebugLoc());
120 *VPI, Ingredient.getDebugLoc());
123 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
127 *VPI, Ingredient.getDebugLoc());
136 "Only recpies with zero or one defined values expected");
137 Ingredient.eraseFromParent();
154 if (
A->getOpcode() != Instruction::Store ||
155 B->getOpcode() != Instruction::Store)
165 const APInt *Distance;
171 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
173 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
179 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
181 auto VFs =
B->getParent()->getPlan()->vectorFactors();
183 return Distance->
abs().
uge(
191 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
192 L(L), TypeInfo(TypeInfo) {}
199 return ExcludeRecipes.contains(&R) ||
200 (Store && isNoAliasViaDistance(Store, &GroupLeader));
213 std::optional<SinkStoreInfo> SinkInfo = {}) {
214 bool CheckReads = SinkInfo.has_value();
223 "Expected at most one successor in block chain");
226 if (SinkInfo && SinkInfo->shouldSkip(R))
230 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
241 if (CheckReads &&
R.mayReadFromMemory() &&
248 Loc->AATags.NoAlias))
268 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
273 return RepR && RepR->getOpcode() == Instruction::Alloca;
282 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
298 if (!ScalarVFOnly && RepR->isSingleScalar())
301 WorkList.
insert({SinkTo, Candidate});
313 for (
auto &Recipe : *VPBB)
315 InsertIfValidSinkCandidate(VPBB,
Op);
319 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
322 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
327 auto UsersOutsideSinkTo =
329 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
331 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
332 return !U->usesFirstLaneOnly(SinkCandidate);
335 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
337 if (NeedsDuplicating) {
341 if (
auto *SinkCandidateRepR =
347 nullptr , *SinkCandidateRepR,
351 Clone = SinkCandidate->
clone();
361 InsertIfValidSinkCandidate(SinkTo,
Op);
371 if (!EntryBB || EntryBB->size() != 1 ||
381 if (EntryBB->getNumSuccessors() != 2)
386 if (!Succ0 || !Succ1)
389 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
391 if (Succ0->getSingleSuccessor() == Succ1)
393 if (Succ1->getSingleSuccessor() == Succ0)
410 if (!Region1->isReplicator())
412 auto *MiddleBasicBlock =
414 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
419 if (!Region2 || !Region2->isReplicator())
424 if (!Mask1 || Mask1 != Mask2)
427 assert(Mask1 && Mask2 &&
"both region must have conditions");
433 if (TransformedRegions.
contains(Region1))
440 if (!Then1 || !Then2)
460 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
466 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
467 Phi1ToMove.eraseFromParent();
470 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
484 TransformedRegions.
insert(Region1);
487 return !TransformedRegions.
empty();
494 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
495 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
496 auto *BlockInMask = PredRecipe->
getMask();
515 RecipeWithoutMask->getDebugLoc());
539 if (RepR->isPredicated())
558 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
572 if (!VPBB->getParent())
576 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
585 R.moveBefore(*PredVPBB, PredVPBB->
end());
587 auto *ParentRegion = VPBB->getParent();
588 if (ParentRegion && ParentRegion->getExiting() == VPBB)
589 ParentRegion->setExiting(PredVPBB);
590 for (
auto *Succ :
to_vector(VPBB->successors())) {
596 return !WorkList.
empty();
603 bool ShouldSimplify =
true;
604 while (ShouldSimplify) {
620 if (!
IV ||
IV->getTruncInst())
635 for (
auto *U : FindMyCast->
users()) {
637 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
638 FoundUserCast = UserCast;
642 FindMyCast = FoundUserCast;
667 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
680 WidenOriginalIV->dropPoisonGeneratingFlags();
693 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
695 if (IsConditionalAssume)
698 if (R.mayHaveSideEffects())
702 return all_of(R.definedValues(),
703 [](
VPValue *V) { return V->getNumUsers() == 0; });
719 if (!PhiR || PhiR->getNumOperands() != 2)
721 VPUser *PhiUser = PhiR->getSingleUser();
725 if (PhiUser !=
Incoming->getDefiningRecipe() ||
728 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
729 PhiR->eraseFromParent();
730 Incoming->getDefiningRecipe()->eraseFromParent();
745 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
755 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
761 if (ResultTy != StepTy) {
768 Builder.setInsertPoint(VecPreheader);
769 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
771 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
777 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
782 Users.insert_range(V->users());
784 return Users.takeVector();
798 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
835 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
836 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
844 Def->operands(),
true,
846 Clone->insertAfter(Def);
847 Def->replaceAllUsesWith(Clone);
858 PtrIV->replaceAllUsesWith(PtrAdd);
865 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
866 return U->usesScalars(WideIV);
872 Plan,
ID.getKind(),
ID.getInductionOpcode(),
874 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
875 WideIV->getDebugLoc(), Builder);
878 if (!HasOnlyVectorVFs) {
880 "plans containing a scalar VF cannot also include scalable VFs");
881 WideIV->replaceAllUsesWith(Steps);
884 WideIV->replaceUsesWithIf(Steps,
885 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
887 return U.usesFirstLaneOnly(WideIV);
888 return U.usesScalars(WideIV);
904 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
909 if (!Def || Def->getNumOperands() != 2)
917 auto IsWideIVInc = [&]() {
918 auto &
ID = WideIV->getInductionDescriptor();
921 VPValue *IVStep = WideIV->getStepValue();
922 switch (
ID.getInductionOpcode()) {
923 case Instruction::Add:
925 case Instruction::FAdd:
928 case Instruction::FSub:
931 case Instruction::Sub: {
951 return IsWideIVInc() ? WideIV :
nullptr;
971 if (WideIntOrFp && WideIntOrFp->getTruncInst())
984 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
985 FirstActiveLaneType,
DL);
987 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
994 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
997 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
999 VPValue *Start = WideIV->getStartValue();
1000 VPValue *Step = WideIV->getStepValue();
1001 EndValue =
B.createDerivedIV(
1003 Start, EndValue, Step);
1023 assert(EndValue &&
"end value must have been pre-computed");
1033 VPValue *Step = WideIV->getStepValue();
1036 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1041 return B.createPtrAdd(EndValue,
1042 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1046 const auto &
ID = WideIV->getInductionDescriptor();
1047 return B.createNaryOp(
1048 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1050 : Instruction::FAdd,
1051 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1066 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1068 if (PredVPBB == MiddleVPBB)
1070 ExitIRI->getOperand(Idx),
1074 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1076 ExitIRI->setOperand(Idx, Escape);
1093 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1096 ExpR->replaceAllUsesWith(V->second);
1097 ExpR->eraseFromParent();
1106 while (!WorkList.
empty()) {
1108 if (!Seen.
insert(Cur).second)
1116 R->eraseFromParent();
1123static std::optional<std::pair<bool, unsigned>>
1126 std::optional<std::pair<bool, unsigned>>>(R)
1129 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1130 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1131 return std::make_pair(
true,
I->getVectorIntrinsicID());
1133 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1137 return std::make_pair(
false,
1140 .
Default([](
auto *) {
return std::nullopt; });
1156 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1158 Ops.push_back(
Op->getLiveInIRValue());
1161 auto FoldToIRValue = [&]() ->
Value * {
1163 if (OpcodeOrIID->first) {
1164 if (R.getNumOperands() != 2)
1166 unsigned ID = OpcodeOrIID->second;
1167 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1170 unsigned Opcode = OpcodeOrIID->second;
1179 return Folder.FoldSelect(
Ops[0],
Ops[1],
1182 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1184 case Instruction::Select:
1185 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1186 case Instruction::ICmp:
1187 case Instruction::FCmp:
1190 case Instruction::GetElementPtr: {
1193 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1203 case Instruction::ExtractElement:
1210 if (
Value *V = FoldToIRValue())
1211 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1217 VPlan *Plan = Def->getParent()->getPlan();
1224 return Def->replaceAllUsesWith(V);
1230 PredPHI->replaceAllUsesWith(
Op);
1238 if (TruncTy == ATy) {
1239 Def->replaceAllUsesWith(
A);
1248 : Instruction::ZExt;
1251 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1253 Ext->setUnderlyingValue(UnderlyingExt);
1255 Def->replaceAllUsesWith(Ext);
1257 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1258 Def->replaceAllUsesWith(Trunc);
1266 for (
VPUser *U :
A->users()) {
1268 for (
VPValue *VPV : R->definedValues())
1282 Def->replaceAllUsesWith(
X);
1283 Def->eraseFromParent();
1289 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1293 return Def->replaceAllUsesWith(
X);
1297 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1301 return Def->replaceAllUsesWith(Def->getOperand(1));
1308 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1309 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1310 return Def->replaceAllUsesWith(
1311 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1315 return Def->replaceAllUsesWith(Plan->
getFalse());
1318 return Def->replaceAllUsesWith(
X);
1323 Def->setOperand(0,
C);
1324 Def->setOperand(1,
Y);
1325 Def->setOperand(2,
X);
1334 X->hasMoreThanOneUniqueUser())
1335 return Def->replaceAllUsesWith(
1336 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1339 return Def->replaceAllUsesWith(
A);
1342 return Def->replaceAllUsesWith(
A);
1345 return Def->replaceAllUsesWith(
1346 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1350 return Def->replaceAllUsesWith(
A);
1365 R->setOperand(1,
Y);
1366 R->setOperand(2,
X);
1370 R->replaceAllUsesWith(Cmp);
1375 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1376 Cmp->setDebugLoc(Def->getDebugLoc());
1388 if (
Op->getNumUsers() > 1 ||
1392 }
else if (!UnpairedCmp) {
1393 UnpairedCmp =
Op->getDefiningRecipe();
1397 UnpairedCmp =
nullptr;
1404 if (NewOps.
size() < Def->getNumOperands()) {
1406 return Def->replaceAllUsesWith(NewAnyOf);
1418 return Def->replaceAllUsesWith(NewCmp);
1426 return Def->replaceAllUsesWith(Def->getOperand(1));
1432 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1433 Def->replaceAllUsesWith(
X);
1443 Def->setOperand(1, Def->getOperand(0));
1444 Def->setOperand(0,
Y);
1449 if (Phi->getOperand(0) == Phi->getOperand(1))
1450 Phi->replaceAllUsesWith(Phi->getOperand(0));
1458 Def->replaceAllUsesWith(
1459 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1463 return Def->replaceAllUsesWith(
A);
1469 Def->replaceAllUsesWith(
1470 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1477 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1482 Def->replaceAllUsesWith(
1492 "broadcast operand must be single-scalar");
1493 Def->setOperand(0,
C);
1498 if (Phi->getNumOperands() == 1)
1499 Phi->replaceAllUsesWith(Phi->getOperand(0));
1512 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1513 Phi->getSingleUser() == Def) {
1514 Phi->setOperand(0,
Y);
1515 Def->replaceAllUsesWith(Phi);
1524 return VPR->replaceAllUsesWith(VPR->getOperand(0));
1530 Steps->replaceAllUsesWith(Steps->getOperand(0));
1538 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1540 return PhiR && PhiR->isInLoop();
1546 Def->replaceAllUsesWith(
A);
1555 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1556 return Def->replaceAllUsesWith(
A);
1560 return Def->replaceAllUsesWith(
A);
1589 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1598 !WidenStoreR->isConsecutive()) {
1599 assert(!WidenStoreR->isReverse() &&
1600 "Not consecutive memory recipes shouldn't be reversed");
1601 VPValue *Mask = WidenStoreR->getMask();
1610 {WidenStoreR->getOperand(1)});
1615 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1616 true ,
nullptr , {},
1618 ScalarStore->insertBefore(WidenStoreR);
1619 WidenStoreR->eraseFromParent();
1627 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1628 true ,
nullptr , *RepR ,
1629 *RepR , RepR->getDebugLoc());
1630 Clone->insertBefore(RepOrWidenR);
1632 VPValue *ExtractOp = Clone->getOperand(0);
1638 Clone->setOperand(0, ExtractOp);
1639 RepR->eraseFromParent();
1652 if (!
all_of(RepOrWidenR->users(),
1653 [RepOrWidenR](
const VPUser *U) {
1654 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1655 unsigned Opcode = VPI->getOpcode();
1656 if (Opcode == VPInstruction::ExtractLastLane ||
1657 Opcode == VPInstruction::ExtractLastPart ||
1658 Opcode == VPInstruction::ExtractPenultimateElement)
1662 return U->usesScalars(RepOrWidenR);
1665 if (Op->getSingleUser() != RepOrWidenR)
1669 bool LiveInNeedsBroadcast =
1670 Op->isLiveIn() && !isa<Constant>(Op->getLiveInIRValue());
1671 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1672 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1677 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1678 true ,
nullptr, *RepOrWidenR);
1679 Clone->insertBefore(RepOrWidenR);
1680 RepOrWidenR->replaceAllUsesWith(Clone);
1682 RepOrWidenR->eraseFromParent();
1718 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1719 UniqueValues.
insert(Blend->getIncomingValue(0));
1720 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1722 UniqueValues.
insert(Blend->getIncomingValue(
I));
1724 if (UniqueValues.
size() == 1) {
1725 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1726 Blend->eraseFromParent();
1730 if (Blend->isNormalized())
1736 unsigned StartIndex = 0;
1737 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1742 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1749 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1751 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1752 if (
I == StartIndex)
1754 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1755 OperandsWithMask.
push_back(Blend->getMask(
I));
1760 OperandsWithMask, Blend->getDebugLoc());
1761 NewBlend->insertBefore(&R);
1763 VPValue *DeadMask = Blend->getMask(StartIndex);
1765 Blend->eraseFromParent();
1770 if (NewBlend->getNumOperands() == 3 &&
1772 VPValue *Inc0 = NewBlend->getOperand(0);
1773 VPValue *Inc1 = NewBlend->getOperand(1);
1774 VPValue *OldMask = NewBlend->getOperand(2);
1775 NewBlend->setOperand(0, Inc1);
1776 NewBlend->setOperand(1, Inc0);
1777 NewBlend->setOperand(2, NewMask);
1804 APInt MaxVal = AlignedTC - 1;
1807 unsigned NewBitWidth =
1813 bool MadeChange =
false;
1822 if (!WideIV || !WideIV->isCanonical() ||
1823 WideIV->hasMoreThanOneUniqueUser() ||
1824 NewIVTy == WideIV->getScalarType())
1829 VPUser *SingleUser = WideIV->getSingleUser();
1838 WideIV->setStartValue(NewStart);
1840 WideIV->setStepValue(NewStep);
1846 Cmp->setOperand(1, NewBTC);
1860 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1862 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
1875 const SCEV *VectorTripCount =
1880 "Trip count SCEV must be computable");
1901 auto *Term = &ExitingVPBB->
back();
1914 for (
unsigned Part = 0; Part < UF; ++Part) {
1922 Extracts[Part] = Ext;
1934 match(Phi->getBackedgeValue(),
1936 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1949 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1956 "Expected incoming values of Phi to be ActiveLaneMasks");
1961 EntryALM->setOperand(2, ALMMultiplier);
1962 LoopALM->setOperand(2, ALMMultiplier);
1966 ExtractFromALM(EntryALM, EntryExtracts);
1971 ExtractFromALM(LoopALM, LoopExtracts);
1973 Not->setOperand(0, LoopExtracts[0]);
1976 for (
unsigned Part = 0; Part < UF; ++Part) {
1977 Phis[Part]->setStartValue(EntryExtracts[Part]);
1978 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1991 auto *Term = &ExitingVPBB->
back();
1998 const SCEV *VectorTripCount =
2004 "Trip count SCEV must be computable");
2027 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2028 return R->isCanonical();
2029 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2030 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2036 R->getScalarType());
2038 HeaderR.eraseFromParent();
2042 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2043 HeaderR.eraseFromParent();
2052 B->setParent(
nullptr);
2061 {}, {}, Term->getDebugLoc());
2065 Term->eraseFromParent();
2092 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2102 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2103 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2112 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2127 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2130 if (SinkCandidate == Previous)
2134 !Seen.
insert(SinkCandidate).second ||
2147 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2150 "only recipes with a single defined value expected");
2165 if (SinkCandidate == FOR)
2168 SinkCandidate->moveAfter(Previous);
2169 Previous = SinkCandidate;
2187 for (
VPUser *U : FOR->users()) {
2193 [&VPDT, HoistPoint](
VPUser *U) {
2194 auto *R = cast<VPRecipeBase>(U);
2195 return HoistPoint == R ||
2196 VPDT.properlyDominates(HoistPoint, R);
2198 "HoistPoint must dominate all users of FOR");
2200 auto NeedsHoisting = [HoistPoint, &VPDT,
2202 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2203 if (!HoistCandidate)
2208 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2209 "CFG in VPlan should still be flat, without replicate regions");
2211 if (!Visited.
insert(HoistCandidate).second)
2223 return HoistCandidate;
2232 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2235 "only recipes with a single defined value expected");
2247 if (
auto *R = NeedsHoisting(
Op)) {
2250 if (R->getNumDefinedValues() != 1)
2264 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2283 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2286 while (
auto *PrevPhi =
2288 assert(PrevPhi->getParent() == FOR->getParent());
2290 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2308 {FOR, FOR->getBackedgeValue()});
2310 FOR->replaceAllUsesWith(RecurSplice);
2313 RecurSplice->setOperand(0, FOR);
2319 for (
VPUser *U : RecurSplice->users()) {
2330 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2331 VPValue *PenultimateLastIter =
2333 {PenultimateIndex, FOR->getBackedgeValue()});
2338 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2351 RecurKind RK = PhiR->getRecurrenceKind();
2358 RecWithFlags->dropPoisonGeneratingFlags();
2364struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2366 return Def == getEmptyKey() || Def == getTombstoneKey();
2377 return GEP->getSourceElementType();
2380 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2381 [](
auto *
I) {
return I->getSourceElementType(); })
2382 .
Default([](
auto *) {
return nullptr; });
2386 static bool canHandle(
const VPSingleDefRecipe *Def) {
2395 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2396 C->second == Instruction::ExtractValue)))
2402 return !
Def->mayReadFromMemory();
2406 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2407 const VPlan *Plan =
Def->getParent()->getPlan();
2408 VPTypeAnalysis TypeInfo(*Plan);
2411 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2414 if (RFlags->hasPredicate())
2420 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2423 if (
L->getVPDefID() !=
R->getVPDefID() ||
2425 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2427 !
equal(
L->operands(),
R->operands()))
2430 "must have valid opcode info for both recipes");
2432 if (LFlags->hasPredicate() &&
2433 LFlags->getPredicate() !=
2439 const VPRegionBlock *RegionL =
L->getRegion();
2440 const VPRegionBlock *RegionR =
R->getRegion();
2443 L->getParent() !=
R->getParent())
2445 const VPlan *Plan =
L->getParent()->getPlan();
2446 VPTypeAnalysis TypeInfo(*Plan);
2447 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2462 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2466 if (!VPDT.
dominates(V->getParent(), VPBB))
2471 Def->replaceAllUsesWith(V);
2490 "Expected vector prehader's successor to be the vector loop region");
2497 return !Op->isDefinedOutsideLoopRegions();
2500 R.moveBefore(*Preheader, Preheader->
end());
2524 VPValue *ResultVPV = R.getVPSingleValue();
2526 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2527 if (!NewResSizeInBits)
2540 (void)OldResSizeInBits;
2548 VPW->dropPoisonGeneratingFlags();
2550 if (OldResSizeInBits != NewResSizeInBits &&
2555 Ext->insertAfter(&R);
2557 Ext->setOperand(0, ResultVPV);
2558 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2561 "Only ICmps should not need extending the result.");
2570 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2571 auto *
Op = R.getOperand(Idx);
2572 unsigned OpSizeInBits =
2574 if (OpSizeInBits == NewResSizeInBits)
2576 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2577 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2579 R.setOperand(Idx, ProcessedIter->second);
2587 Builder.setInsertPoint(&R);
2589 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2590 ProcessedIter->second = NewOp;
2591 R.setOperand(Idx, NewOp);
2606 assert(VPBB->getNumSuccessors() == 2 &&
2607 "Two successors expected for BranchOnCond");
2608 unsigned RemovedIdx;
2619 "There must be a single edge between VPBB and its successor");
2628 VPBB->back().eraseFromParent();
2690 VPValue *StartV = CanonicalIVPHI->getStartValue();
2692 auto *CanonicalIVIncrement =
2696 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2697 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2707 VPValue *TripCount, *IncrementValue;
2712 IncrementValue = CanonicalIVIncrement;
2718 IncrementValue = CanonicalIVPHI;
2722 auto *EntryIncrement = Builder.createOverflowingOp(
2730 {EntryIncrement, TC, ALMMultiplier},
DL,
2731 "active.lane.mask.entry");
2737 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2742 Builder.setInsertPoint(OriginalTerminator);
2743 auto *InLoopIncrement =
2745 {IncrementValue}, {
false,
false},
DL);
2747 {InLoopIncrement, TripCount, ALMMultiplier},
2748 DL,
"active.lane.mask.next");
2753 auto *NotMask = Builder.createNot(ALM,
DL);
2766 auto *FoundWidenCanonicalIVUser =
find_if(
2770 "Must have at most one VPWideCanonicalIVRecipe");
2771 if (FoundWidenCanonicalIVUser !=
2773 auto *WideCanonicalIV =
2775 WideCanonicalIVs.
push_back(WideCanonicalIV);
2783 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2784 WideCanonicalIVs.
push_back(WidenOriginalIV);
2790 for (
auto *Wide : WideCanonicalIVs) {
2796 assert(VPI->getOperand(0) == Wide &&
2797 "WidenCanonicalIV must be the first operand of the compare");
2798 assert(!HeaderMask &&
"Multiple header masks found?");
2806 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2809 UseActiveLaneMaskForControlFlow) &&
2810 "DataAndControlFlowWithoutRuntimeCheck implies "
2811 "UseActiveLaneMaskForControlFlow");
2814 auto *FoundWidenCanonicalIVUser =
find_if(
2816 assert(FoundWidenCanonicalIVUser &&
2817 "Must have widened canonical IV when tail folding!");
2819 auto *WideCanonicalIV =
2822 if (UseActiveLaneMaskForControlFlow) {
2832 nullptr,
"active.lane.mask");
2848 template <
typename OpTy>
bool match(OpTy *V)
const {
2859template <
typename Op0_t,
typename Op1_t>
2878 VPValue *Addr, *Mask, *EndPtr;
2881 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2883 EVLEndPtr->insertBefore(&CurRecipe);
2884 EVLEndPtr->setOperand(1, &EVL);
2888 if (
match(&CurRecipe,
2902 LoadR->insertBefore(&CurRecipe);
2904 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2913 StoredVal, EVL, Mask);
2915 if (
match(&CurRecipe,
2921 Intrinsic::experimental_vp_reverse,
2922 {ReversedVal, Plan->
getTrue(), &EVL},
2926 AdjustEndPtr(EndPtr), NewReverse, EVL,
2931 if (Rdx->isConditional() &&
2936 if (Interleave->getMask() &&
2941 if (
match(&CurRecipe,
2950 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2973 "User of VF that we can't transform to EVL.");
2979 [&LoopRegion, &Plan](
VPUser *U) {
2981 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2982 m_Specific(&Plan.getVFxUF()))) ||
2983 isa<VPWidenPointerInductionRecipe>(U);
2985 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2986 "increment of the canonical induction.");
3006 MaxEVL = Builder.createScalarZExtOrTrunc(
3010 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3011 VPValue *PrevEVL = Builder.createScalarPhi(
3025 Intrinsic::experimental_vp_splice,
3026 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3030 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3048 VPValue *EVLMask = Builder.createICmp(
3066 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
3067 "New recipe must define the same number of values as the "
3072 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
3073 VPValue *CurVPV = CurRecipe->getVPValue(
I);
3085 R->eraseFromParent();
3135 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3143 VPValue *StartV = CanonicalIVPHI->getStartValue();
3147 EVLPhi->insertAfter(CanonicalIVPHI);
3148 VPBuilder Builder(Header, Header->getFirstNonPhi());
3151 VPPhi *AVLPhi = Builder.createScalarPhi(
3155 if (MaxSafeElements) {
3165 auto *CanonicalIVIncrement =
3167 Builder.setInsertPoint(CanonicalIVIncrement);
3171 OpVPEVL = Builder.createScalarZExtOrTrunc(
3172 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3174 auto *NextEVLIV = Builder.createOverflowingOp(
3175 Instruction::Add, {OpVPEVL, EVLPhi},
3176 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3177 CanonicalIVIncrement->hasNoSignedWrap()},
3178 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3179 EVLPhi->addOperand(NextEVLIV);
3181 VPValue *NextAVL = Builder.createOverflowingOp(
3182 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3190 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3191 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3205 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3216 [[maybe_unused]]
bool FoundAVL =
3219 assert(FoundAVL &&
"Didn't find AVL?");
3227 [[maybe_unused]]
bool FoundAVLNext =
3230 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3241 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3244 "Unexpected canonical iv");
3250 CanonicalIV->eraseFromParent();
3264 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3269 LatchExitingBr->setOperand(0,
3281 return R->getRegion() ||
3285 for (
const SCEV *Stride : StridesMap.
values()) {
3288 const APInt *StrideConst;
3305 unsigned BW = U->getType()->getScalarSizeInBits();
3311 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3318 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3321 if (NewSCEV != ScevExpr) {
3323 ExpSCEV->replaceAllUsesWith(NewExp);
3332 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3336 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3341 while (!Worklist.
empty()) {
3344 if (!Visited.
insert(CurRec).second)
3366 RecWithFlags->isDisjoint()) {
3369 Instruction::Add, {
A,
B}, {
false,
false},
3370 RecWithFlags->getDebugLoc());
3371 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3372 RecWithFlags->replaceAllUsesWith(New);
3373 RecWithFlags->eraseFromParent();
3376 RecWithFlags->dropPoisonGeneratingFlags();
3381 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3382 "found instruction with poison generating flags not covered by "
3383 "VPRecipeWithIRFlags");
3388 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3400 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3401 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3402 if (AddrDef && WidenRec->isConsecutive() &&
3403 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3404 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3406 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3410 InterleaveRec->getInterleaveGroup();
3411 bool NeedPredication =
false;
3413 I < NumMembers; ++
I) {
3416 NeedPredication |= BlockNeedsPredication(Member->getParent());
3419 if (NeedPredication)
3420 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3432 if (InterleaveGroups.empty())
3439 for (
const auto *IG : InterleaveGroups) {
3445 StoredValues.
push_back(StoreR->getStoredValue());
3446 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3453 StoredValues.
push_back(StoreR->getStoredValue());
3457 bool NeedsMaskForGaps =
3458 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3459 (!StoredValues.
empty() && !IG->isFull());
3471 VPValue *Addr = Start->getAddr();
3480 assert(IG->getIndex(IRInsertPos) != 0 &&
3481 "index of insert position shouldn't be zero");
3485 IG->getIndex(IRInsertPos),
3489 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3495 if (IG->isReverse()) {
3498 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3499 ReversePtr->insertBefore(InsertPos);
3503 InsertPos->getMask(), NeedsMaskForGaps,
3504 InterleaveMD, InsertPos->getDebugLoc());
3505 VPIG->insertBefore(InsertPos);
3508 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3511 if (!Member->getType()->isVoidTy()) {
3570 AddOp = Instruction::Add;
3571 MulOp = Instruction::Mul;
3573 AddOp =
ID.getInductionOpcode();
3574 MulOp = Instruction::FMul;
3582 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3583 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3585 Flags.dropPoisonGeneratingFlags();
3594 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3599 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3600 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3606 WidePHI->insertBefore(WidenIVR);
3617 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3621 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3624 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3627 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3634 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3637 WidePHI->addOperand(
Next);
3665 VPlan *Plan = R->getParent()->getPlan();
3666 VPValue *Start = R->getStartValue();
3667 VPValue *Step = R->getStepValue();
3668 VPValue *VF = R->getVFValue();
3670 assert(R->getInductionDescriptor().getKind() ==
3672 "Not a pointer induction according to InductionDescriptor!");
3675 "Recipe should have been replaced");
3681 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3685 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3688 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3689 VPValue *PtrAdd = Builder.createNaryOp(
3691 R->replaceAllUsesWith(PtrAdd);
3696 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3698 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3701 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3710 if (!R->isReplicator())
3714 R->dissolveToCFGLoop();
3736 WidenIVR->replaceAllUsesWith(PtrAdd);
3749 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3750 Select = Builder.createSelect(Blend->getMask(
I),
3751 Blend->getIncomingValue(
I),
Select,
3752 R.getDebugLoc(),
"predphi");
3753 Blend->replaceAllUsesWith(
Select);
3768 for (
VPValue *
Op : LastActiveL->operands()) {
3769 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3774 VPValue *FirstInactiveLane = Builder.createNaryOp(
3776 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3781 VPValue *LastLane = Builder.createNaryOp(
3782 Instruction::Sub, {FirstInactiveLane, One},
3783 LastActiveL->getDebugLoc(),
"last.active.lane");
3794 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3797 ToRemove.push_back(BranchOnCountInst);
3812 ? Instruction::UIToFP
3813 : Instruction::Trunc;
3814 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3820 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3825 Flags = {VPI->getFastMathFlags()};
3830 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3832 VPI->replaceAllUsesWith(VectorStep);
3838 R->eraseFromParent();
3851 "unsupported early exit VPBB");
3862 "Terminator must be be BranchOnCond");
3863 VPValue *CondOfEarlyExitingVPBB =
3865 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3866 ? CondOfEarlyExitingVPBB
3867 : Builder.createNot(CondOfEarlyExitingVPBB);
3884 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3889 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3890 if (ExitIRI->getNumOperands() != 1) {
3893 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
3896 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3897 if (!IncomingFromEarlyExit->
isLiveIn()) {
3905 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3916 "Unexpected terminator");
3917 auto *IsLatchExitTaken =
3919 LatchExitingBranch->getOperand(1));
3920 auto *AnyExitTaken = Builder.createNaryOp(
3921 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3923 LatchExitingBranch->eraseFromParent();
3933 Type *RedTy = Ctx.Types.inferScalarType(Red);
3934 VPValue *VecOp = Red->getVecOp();
3937 auto IsExtendedRedValidAndClampRange =
3949 if (Red->isPartialReduction()) {
3954 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3955 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3958 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3959 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3960 Red->getFastMathFlags(),
CostKind);
3962 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3970 IsExtendedRedValidAndClampRange(
3973 Ctx.Types.inferScalarType(
A)))
3991 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3994 Type *RedTy = Ctx.Types.inferScalarType(Red);
3997 auto IsMulAccValidAndClampRange =
4004 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4007 if (Red->isPartialReduction()) {
4009 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4012 MulAccCost = Ctx.TTI.getPartialReductionCost(
4013 Opcode, SrcTy, SrcTy2, RedTy, VF,
4023 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
4027 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4029 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4037 ExtCost += Ext0->computeCost(VF, Ctx);
4039 ExtCost += Ext1->computeCost(VF, Ctx);
4041 ExtCost += OuterExt->computeCost(VF, Ctx);
4043 return MulAccCost.
isValid() &&
4044 MulAccCost < ExtCost + MulCost + RedCost;
4049 VPValue *VecOp = Red->getVecOp();
4067 if (!ExtA || ExtB || !ValB->
isLiveIn())
4083 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4084 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4085 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4086 Mul->setOperand(1, ExtB);
4096 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4101 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4108 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4125 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4134 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4135 Ext0->getOpcode() == Ext1->getOpcode() &&
4136 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4138 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4139 *Ext0, *Ext0, Ext0->getDebugLoc());
4140 NewExt0->insertBefore(Ext0);
4145 Ext->getResultType(),
nullptr, *Ext1,
4146 *Ext1, Ext1->getDebugLoc());
4149 Mul->setOperand(0, NewExt0);
4150 Mul->setOperand(1, NewExt1);
4151 Red->setOperand(1,
Mul);
4164 auto IP = std::next(Red->getIterator());
4165 auto *VPBB = Red->getParent();
4175 Red->replaceAllUsesWith(AbstractR);
4205 for (
VPValue *VPV : VPValues) {
4207 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
4215 if (
User->usesScalars(VPV))
4218 HoistPoint = HoistBlock->
begin();
4222 "All users must be in the vector preheader or dominated by it");
4227 VPV->replaceUsesWithIf(Broadcast,
4228 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4229 return Broadcast != &U && !U.usesScalars(VPV);
4246 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4247 RepR->getOpcode() != Instruction::Load)
4250 VPValue *Addr = RepR->getOperand(0);
4253 if (!
Loc.AATags.Scope)
4258 if (R.mayWriteToMemory()) {
4260 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4268 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4272 const AAMDNodes &LoadAA = LoadLoc.AATags;
4288 return CommonMetadata;
4291template <
unsigned Opcode>
4296 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4297 "Only Load and Store opcodes supported");
4298 constexpr bool IsLoad = (Opcode == Instruction::Load);
4308 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4312 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4315 RecipesByAddress[AddrSCEV].push_back(RepR);
4322 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4324 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4325 if (Recipes.size() < 2)
4333 VPValue *MaskI = RecipeI->getMask();
4334 Type *TypeI = GetLoadStoreValueType(RecipeI);
4340 bool HasComplementaryMask =
false;
4345 VPValue *MaskJ = RecipeJ->getMask();
4346 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4347 if (TypeI == TypeJ) {
4357 if (HasComplementaryMask) {
4358 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4368template <
typename InstType>
4388 for (
auto &Group :
Groups) {
4413 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4414 false,
nullptr, *EarliestLoad,
4417 UnpredicatedLoad->insertBefore(EarliestLoad);
4421 Load->replaceAllUsesWith(UnpredicatedLoad);
4422 Load->eraseFromParent();
4432 if (!StoreLoc || !StoreLoc->AATags.Scope)
4438 StoresToSink.
end());
4442 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4457 for (
auto &Group :
Groups) {
4474 VPValue *SelectedValue = Group[0]->getOperand(0);
4477 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4478 VPValue *Mask = Group[
I]->getMask();
4480 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4488 auto *UnpredicatedStore =
4490 {SelectedValue, LastStore->getOperand(1)},
4492 nullptr, *LastStore, CommonMetadata);
4493 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4497 Store->eraseFromParent();
4504 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4505 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4539 auto *TCMO = Builder.createNaryOp(
4567 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4569 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4576 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4586 DefR->replaceUsesWithIf(
4587 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4589 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4603 for (
VPValue *Def : R.definedValues()) {
4616 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4618 return U->usesScalars(Def) &&
4621 if (
none_of(Def->users(), IsCandidateUnpackUser))
4628 Unpack->insertAfter(&R);
4629 Def->replaceUsesWithIf(Unpack,
4630 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4631 return IsCandidateUnpackUser(&U);
4641 bool RequiresScalarEpilogue) {
4643 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4662 if (TailByMasking) {
4663 TC = Builder.createNaryOp(
4665 {TC, Builder.createNaryOp(Instruction::Sub,
4676 Builder.createNaryOp(Instruction::URem, {TC, Step},
4685 if (RequiresScalarEpilogue) {
4687 "requiring scalar epilogue is not supported with fail folding");
4690 R = Builder.createSelect(IsZero, Step, R);
4693 VPValue *Res = Builder.createNaryOp(
4712 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4719 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4723 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4728 VPValue *MulByUF = Builder.createOverflowingOp(
4729 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4738 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4746 const SCEV *Expr = ExpSCEV->getSCEV();
4749 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4754 ExpSCEV->eraseFromParent();
4757 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4758 "after any VPIRInstructions");
4761 auto EI = Entry->begin();
4771 return ExpandedSCEVs;
4787 return Member0Op == OpV;
4789 return !W->getMask() && Member0Op == OpV;
4791 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4802 if (!InterleaveR || InterleaveR->
getMask())
4805 Type *GroupElementTy =
nullptr;
4809 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4810 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4817 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4818 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4827 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4828 GroupSize == VectorRegWidth;
4836 return RepR && RepR->isSingleScalar();
4843 auto *R = V->getDefiningRecipe();
4851 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4852 WideMember0->setOperand(
4861 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4863 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4864 false, {}, LoadGroup->getDebugLoc());
4865 L->insertBefore(LoadGroup);
4871 assert(RepR->isSingleScalar() &&
4873 "must be a single scalar load");
4874 NarrowedOps.
insert(RepR);
4879 VPValue *PtrOp = WideLoad->getAddr();
4881 PtrOp = VecPtr->getOperand(0);
4886 nullptr, {}, *WideLoad);
4887 N->insertBefore(WideLoad);
4917 if (R.mayWriteToMemory() && !InterleaveR)
4939 if (InterleaveR->getStoredValues().empty())
4944 auto *Member0 = InterleaveR->getStoredValues()[0];
4946 all_of(InterleaveR->getStoredValues(),
4947 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4955 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4958 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4959 return IR && IR->getInterleaveGroup()->isFull() &&
4960 IR->getVPValue(Op.index()) == Op.value();
4972 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4974 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4975 R->getNumOperands() > 2)
4978 [WideMember0, Idx =
I](
const auto &
P) {
4979 const auto &[OpIdx, OpV] = P;
4980 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4987 if (StoreGroups.
empty())
4993 for (
auto *StoreGroup : StoreGroups) {
4999 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5000 false, {}, StoreGroup->getDebugLoc());
5001 S->insertBefore(StoreGroup);
5002 StoreGroup->eraseFromParent();
5017 Instruction::Mul, {VScale, UF}, {
true,
false});
5021 Inc->setOperand(1, UF);
5040 "must have a BranchOnCond");
5043 if (VF.
isScalable() && VScaleForTuning.has_value())
5044 VectorStep *= *VScaleForTuning;
5045 assert(VectorStep > 0 &&
"trip count should not be zero");
5049 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5062 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5069 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5072 Start, VectorTC, Step);
5095 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5105 IVEndValues[WideIVR] = EndValue;
5106 ResumePhiR->setOperand(0, EndValue);
5107 ResumePhiR->setName(
"bc.resume.val");
5114 "should only skip truncated wide inductions");
5122 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5124 "Cannot handle loops with uncountable early exits");
5130 "vector.recur.extract");
5132 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5133 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5142 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5143 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5155 "Cannot handle loops with uncountable early exits");
5228 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5242 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=true)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...