45 cl::desc(
"Enable use of wide get active lane mask instructions"));
50 GetIntOrFpInductionDescriptor,
57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
74 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
84 Phi, Start, Step, &Plan.
getVF(), *
II, Ingredient.getDebugLoc());
88 "only VPInstructions expected here");
93 *Load, Ingredient.getOperand(0),
nullptr ,
95 Ingredient.getDebugLoc());
98 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
99 nullptr ,
false ,
false ,
109 drop_end(Ingredient.operands()), CI->getType(),
115 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
126 "Only recpies with zero or one defined values expected");
127 Ingredient.eraseFromParent();
143 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
148 return RepR && RepR->getOpcode() == Instruction::Alloca;
157 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
173 if (!ScalarVFOnly && RepR->isSingleScalar())
176 WorkList.
insert({SinkTo, Candidate});
188 for (
auto &Recipe : *VPBB)
190 InsertIfValidSinkCandidate(VPBB,
Op);
194 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
197 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
202 auto UsersOutsideSinkTo =
204 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
206 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
207 return !U->usesFirstLaneOnly(SinkCandidate);
210 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
212 if (NeedsDuplicating) {
216 if (
auto *SinkCandidateRepR =
222 nullptr , *SinkCandidateRepR);
225 Clone = SinkCandidate->
clone();
235 InsertIfValidSinkCandidate(SinkTo,
Op);
245 if (!EntryBB || EntryBB->size() != 1 ||
255 if (EntryBB->getNumSuccessors() != 2)
260 if (!Succ0 || !Succ1)
263 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
265 if (Succ0->getSingleSuccessor() == Succ1)
267 if (Succ1->getSingleSuccessor() == Succ0)
284 if (!Region1->isReplicator())
286 auto *MiddleBasicBlock =
288 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
293 if (!Region2 || !Region2->isReplicator())
298 if (!Mask1 || Mask1 != Mask2)
301 assert(Mask1 && Mask2 &&
"both region must have conditions");
307 if (TransformedRegions.
contains(Region1))
314 if (!Then1 || !Then2)
334 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
340 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
341 Phi1ToMove.eraseFromParent();
344 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
358 TransformedRegions.
insert(Region1);
361 return !TransformedRegions.
empty();
368 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
369 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
370 auto *BlockInMask = PredRecipe->
getMask();
388 RecipeWithoutMask->getDebugLoc());
412 if (RepR->isPredicated())
431 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
445 if (!VPBB->getParent())
449 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
458 R.moveBefore(*PredVPBB, PredVPBB->
end());
460 auto *ParentRegion = VPBB->getParent();
461 if (ParentRegion && ParentRegion->getExiting() == VPBB)
462 ParentRegion->setExiting(PredVPBB);
463 for (
auto *Succ :
to_vector(VPBB->successors())) {
469 return !WorkList.
empty();
476 bool ShouldSimplify =
true;
477 while (ShouldSimplify) {
493 if (!
IV ||
IV->getTruncInst())
504 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
508 for (
auto *U : FindMyCast->
users()) {
510 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
511 FoundUserCast = UserCast;
515 FindMyCast = FoundUserCast;
540 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
561 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
563 if (IsConditionalAssume)
566 if (R.mayHaveSideEffects())
570 return all_of(R.definedValues(),
571 [](
VPValue *V) { return V->getNumUsers() == 0; });
587 if (!PhiR || PhiR->getNumOperands() != 2 || PhiR->getNumUsers() != 1)
590 if (*PhiR->user_begin() !=
Incoming->getDefiningRecipe() ||
593 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
594 PhiR->eraseFromParent();
595 Incoming->getDefiningRecipe()->eraseFromParent();
610 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
620 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
626 if (ResultTy != StepTy) {
633 Builder.setInsertPoint(VecPreheader);
634 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
636 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
642 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
647 Users.insert_range(V->users());
649 return Users.takeVector();
683 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
684 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
692 Def->operands(),
true);
693 Clone->insertAfter(Def);
694 Def->replaceAllUsesWith(Clone);
705 VPValue *StepV = PtrIV->getOperand(1);
708 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
710 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
720 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
721 return U->usesScalars(WideIV);
727 Plan,
ID.getKind(),
ID.getInductionOpcode(),
729 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
730 WideIV->getDebugLoc(), Builder);
733 if (!HasOnlyVectorVFs)
734 WideIV->replaceAllUsesWith(Steps);
736 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
737 return U.usesScalars(WideIV);
752 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
757 if (!Def || Def->getNumOperands() != 2)
765 auto IsWideIVInc = [&]() {
766 auto &
ID = WideIV->getInductionDescriptor();
769 VPValue *IVStep = WideIV->getStepValue();
770 switch (
ID.getInductionOpcode()) {
771 case Instruction::Add:
773 case Instruction::FAdd:
776 case Instruction::FSub:
779 case Instruction::Sub: {
798 return IsWideIVInc() ? WideIV :
nullptr;
818 if (WideIntOrFp && WideIntOrFp->getTruncInst())
831 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
832 FirstActiveLaneType,
DL);
834 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
841 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
844 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
846 VPValue *Start = WideIV->getStartValue();
847 VPValue *Step = WideIV->getStepValue();
848 EndValue =
B.createDerivedIV(
850 Start, EndValue, Step);
870 assert(EndValue &&
"end value must have been pre-computed");
880 VPValue *Step = WideIV->getStepValue();
883 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
887 return B.createPtrAdd(EndValue,
888 B.createNaryOp(Instruction::Sub, {Zero, Step}),
892 const auto &
ID = WideIV->getInductionDescriptor();
893 return B.createNaryOp(
894 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
897 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
912 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
914 if (PredVPBB == MiddleVPBB)
916 ExitIRI->getOperand(Idx),
920 ExitIRI->getOperand(Idx), SE);
922 ExitIRI->setOperand(Idx, Escape);
939 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
942 ExpR->replaceAllUsesWith(V->second);
943 ExpR->eraseFromParent();
952 while (!WorkList.
empty()) {
954 if (!Seen.
insert(Cur).second)
962 R->eraseFromParent();
969static std::optional<std::pair<bool, unsigned>>
972 std::optional<std::pair<bool, unsigned>>>(R)
975 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
976 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
977 return std::make_pair(
true,
I->getVectorIntrinsicID());
979 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
983 return std::make_pair(
false,
986 .
Default([](
auto *) {
return std::nullopt; });
1002 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1004 Ops.push_back(
Op->getLiveInIRValue());
1007 auto FoldToIRValue = [&]() ->
Value * {
1009 if (OpcodeOrIID->first) {
1010 if (R.getNumOperands() != 2)
1012 unsigned ID = OpcodeOrIID->second;
1013 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1016 unsigned Opcode = OpcodeOrIID->second;
1025 return Folder.FoldSelect(
Ops[0],
Ops[1],
1028 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1030 case Instruction::Select:
1031 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1032 case Instruction::ICmp:
1033 case Instruction::FCmp:
1036 case Instruction::GetElementPtr: {
1039 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1049 case Instruction::ExtractElement:
1056 if (
Value *V = FoldToIRValue())
1057 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1063 VPlan *Plan = Def->getParent()->getPlan();
1070 return Def->replaceAllUsesWith(V);
1076 PredPHI->replaceAllUsesWith(
Op);
1084 if (TruncTy == ATy) {
1085 Def->replaceAllUsesWith(
A);
1094 : Instruction::ZExt;
1097 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1099 Ext->setUnderlyingValue(UnderlyingExt);
1101 Def->replaceAllUsesWith(Ext);
1103 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1104 Def->replaceAllUsesWith(Trunc);
1112 for (
VPUser *U :
A->users()) {
1114 for (
VPValue *VPV : R->definedValues())
1128 Def->replaceAllUsesWith(
X);
1129 Def->eraseFromParent();
1135 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1139 return Def->replaceAllUsesWith(
X);
1143 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1147 return Def->replaceAllUsesWith(Def->getOperand(1));
1154 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1155 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1156 return Def->replaceAllUsesWith(
1157 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1161 return Def->replaceAllUsesWith(Plan->
getFalse());
1164 return Def->replaceAllUsesWith(
X);
1169 Def->setOperand(0,
C);
1170 Def->setOperand(1,
Y);
1171 Def->setOperand(2,
X);
1180 X->hasMoreThanOneUniqueUser())
1181 return Def->replaceAllUsesWith(
1182 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1185 return Def->replaceAllUsesWith(
A);
1188 return Def->replaceAllUsesWith(
1189 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1193 return Def->replaceAllUsesWith(
A);
1208 R->setOperand(1,
Y);
1209 R->setOperand(2,
X);
1213 R->replaceAllUsesWith(Cmp);
1218 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1219 Cmp->setDebugLoc(Def->getDebugLoc());
1229 return Def->replaceAllUsesWith(Def->getOperand(1));
1235 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1236 Def->replaceAllUsesWith(
X);
1246 Def->setOperand(1, Def->getOperand(0));
1247 Def->setOperand(0,
Y);
1252 if (Phi->getOperand(0) == Phi->getOperand(1))
1253 Phi->replaceAllUsesWith(Phi->getOperand(0));
1261 Def->replaceAllUsesWith(
1262 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1270 Def->replaceAllUsesWith(
1271 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1278 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1283 Def->replaceAllUsesWith(
1293 "broadcast operand must be single-scalar");
1294 Def->setOperand(0,
C);
1299 if (Phi->getNumOperands() == 1)
1300 Phi->replaceAllUsesWith(Phi->getOperand(0));
1313 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1314 Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {
1315 Phi->setOperand(0,
Y);
1316 Def->replaceAllUsesWith(Phi);
1323 if (VecPtr->isFirstPart()) {
1324 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1333 Steps->replaceAllUsesWith(Steps->getOperand(0));
1341 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1343 return PhiR && PhiR->isInLoop();
1351 Def->replaceAllUsesWith(
A);
1361 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1362 return Def->replaceAllUsesWith(
A);
1365 if (Plan->
getUF() == 1 &&
1367 return Def->replaceAllUsesWith(
1397 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1404 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1405 true ,
nullptr , *RepR );
1406 Clone->insertBefore(RepOrWidenR);
1407 unsigned ExtractOpc =
1411 auto *Ext =
new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
1412 Ext->insertBefore(Clone);
1413 Clone->setOperand(0, Ext);
1414 RepR->eraseFromParent();
1422 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1423 return U->usesScalars(RepOrWidenR) ||
1424 match(cast<VPRecipeBase>(U),
1425 m_CombineOr(m_ExtractLastElement(m_VPValue()),
1426 m_ExtractLastLanePerPart(m_VPValue())));
1431 RepOrWidenR->operands(),
1433 Clone->insertBefore(RepOrWidenR);
1434 RepOrWidenR->replaceAllUsesWith(Clone);
1436 RepOrWidenR->eraseFromParent();
1472 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1473 UniqueValues.
insert(Blend->getIncomingValue(0));
1474 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1476 UniqueValues.
insert(Blend->getIncomingValue(
I));
1478 if (UniqueValues.
size() == 1) {
1479 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1480 Blend->eraseFromParent();
1484 if (Blend->isNormalized())
1490 unsigned StartIndex = 0;
1491 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1496 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1503 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1505 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1506 if (
I == StartIndex)
1508 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1509 OperandsWithMask.
push_back(Blend->getMask(
I));
1514 OperandsWithMask, Blend->getDebugLoc());
1515 NewBlend->insertBefore(&R);
1517 VPValue *DeadMask = Blend->getMask(StartIndex);
1519 Blend->eraseFromParent();
1524 if (NewBlend->getNumOperands() == 3 &&
1526 VPValue *Inc0 = NewBlend->getOperand(0);
1527 VPValue *Inc1 = NewBlend->getOperand(1);
1528 VPValue *OldMask = NewBlend->getOperand(2);
1529 NewBlend->setOperand(0, Inc1);
1530 NewBlend->setOperand(1, Inc0);
1531 NewBlend->setOperand(2, NewMask);
1558 APInt MaxVal = AlignedTC - 1;
1561 unsigned NewBitWidth =
1567 bool MadeChange =
false;
1576 if (!WideIV || !WideIV->isCanonical() ||
1577 WideIV->hasMoreThanOneUniqueUser() ||
1578 NewIVTy == WideIV->getScalarType())
1583 if (!
match(*WideIV->user_begin(),
1591 WideIV->setStartValue(NewStart);
1593 WideIV->setStepValue(NewStep);
1599 Cmp->setOperand(1, NewBTC);
1613 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1615 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1628 const SCEV *VectorTripCount =
1633 "Trip count SCEV must be computable");
1653 auto *Term = &ExitingVPBB->
back();
1666 for (
unsigned Part = 0; Part < UF; ++Part) {
1673 Extracts[Part] = Ext;
1674 Ext->insertAfter(ALM);
1685 match(Phi->getBackedgeValue(),
1687 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1700 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1707 "Expected incoming values of Phi to be ActiveLaneMasks");
1712 EntryALM->setOperand(2, ALMMultiplier);
1713 LoopALM->setOperand(2, ALMMultiplier);
1717 ExtractFromALM(EntryALM, EntryExtracts);
1722 ExtractFromALM(LoopALM, LoopExtracts);
1724 Not->setOperand(0, LoopExtracts[0]);
1727 for (
unsigned Part = 0; Part < UF; ++Part) {
1728 Phis[Part]->setStartValue(EntryExtracts[Part]);
1729 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1742 auto *Term = &ExitingVPBB->
back();
1751 const SCEV *TripCount =
1754 "Trip count SCEV must be computable");
1757 if (TripCount->
isZero() ||
1777 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1778 return R->isCanonical();
1779 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1780 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1786 R->getScalarType());
1788 HeaderR.eraseFromParent();
1792 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1793 HeaderR.eraseFromParent();
1802 B->setParent(
nullptr);
1811 Term->getDebugLoc());
1815 Term->eraseFromParent();
1823 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1824 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1832 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1850 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1853 if (SinkCandidate == Previous)
1857 !Seen.
insert(SinkCandidate).second ||
1870 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1873 "only recipes with a single defined value expected");
1888 if (SinkCandidate == FOR)
1891 SinkCandidate->moveAfter(Previous);
1892 Previous = SinkCandidate;
1910 for (
VPUser *U : FOR->users()) {
1916 [&VPDT, HoistPoint](
VPUser *U) {
1917 auto *R = cast<VPRecipeBase>(U);
1918 return HoistPoint == R ||
1919 VPDT.properlyDominates(HoistPoint, R);
1921 "HoistPoint must dominate all users of FOR");
1923 auto NeedsHoisting = [HoistPoint, &VPDT,
1925 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1926 if (!HoistCandidate)
1931 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
1932 "CFG in VPlan should still be flat, without replicate regions");
1934 if (!Visited.
insert(HoistCandidate).second)
1946 return HoistCandidate;
1955 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
1958 "only recipes with a single defined value expected");
1970 if (
auto *R = NeedsHoisting(
Op))
1982 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2001 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2004 while (
auto *PrevPhi =
2006 assert(PrevPhi->getParent() == FOR->getParent());
2008 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2026 {FOR, FOR->getBackedgeValue()});
2028 FOR->replaceAllUsesWith(RecurSplice);
2031 RecurSplice->setOperand(0, FOR);
2042 RecurKind RK = PhiR->getRecurrenceKind();
2049 RecWithFlags->dropPoisonGeneratingFlags();
2055struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2057 return Def == getEmptyKey() || Def == getTombstoneKey();
2068 return GEP->getSourceElementType();
2071 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2072 [](
auto *
I) {
return I->getSourceElementType(); })
2073 .
Default([](
auto *) {
return nullptr; });
2077 static bool canHandle(
const VPSingleDefRecipe *Def) {
2086 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2087 C->second == Instruction::ExtractValue)))
2093 return !
Def->mayReadFromMemory();
2097 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2098 const VPlan *Plan =
Def->getParent()->getPlan();
2099 VPTypeAnalysis TypeInfo(*Plan);
2102 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2105 if (RFlags->hasPredicate())
2111 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2114 if (
L->getVPDefID() !=
R->getVPDefID() ||
2116 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2118 !
equal(
L->operands(),
R->operands()))
2121 "must have valid opcode info for both recipes");
2123 if (LFlags->hasPredicate() &&
2124 LFlags->getPredicate() !=
2130 const VPRegionBlock *RegionL =
L->getRegion();
2131 const VPRegionBlock *RegionR =
R->getRegion();
2134 L->getParent() !=
R->getParent())
2136 const VPlan *Plan =
L->getParent()->getPlan();
2137 VPTypeAnalysis TypeInfo(*Plan);
2138 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2153 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2157 if (!VPDT.
dominates(V->getParent(), VPBB))
2162 Def->replaceAllUsesWith(V);
2181 "Expected vector prehader's successor to be the vector loop region");
2188 return !Op->isDefinedOutsideLoopRegions();
2191 R.moveBefore(*Preheader, Preheader->
end());
2215 VPValue *ResultVPV = R.getVPSingleValue();
2217 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2218 if (!NewResSizeInBits)
2231 (void)OldResSizeInBits;
2239 VPW->dropPoisonGeneratingFlags();
2241 if (OldResSizeInBits != NewResSizeInBits &&
2246 Ext->insertAfter(&R);
2248 Ext->setOperand(0, ResultVPV);
2249 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2252 "Only ICmps should not need extending the result.");
2261 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2262 auto *
Op = R.getOperand(Idx);
2263 unsigned OpSizeInBits =
2265 if (OpSizeInBits == NewResSizeInBits)
2267 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2268 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2270 R.setOperand(Idx, ProcessedIter->second);
2278 Builder.setInsertPoint(&R);
2280 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2281 ProcessedIter->second = NewOp;
2282 R.setOperand(Idx, NewOp);
2297 assert(VPBB->getNumSuccessors() == 2 &&
2298 "Two successors expected for BranchOnCond");
2299 unsigned RemovedIdx;
2310 "There must be a single edge between VPBB and its successor");
2319 VPBB->back().eraseFromParent();
2380 VPValue *StartV = CanonicalIVPHI->getStartValue();
2382 auto *CanonicalIVIncrement =
2386 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2387 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2397 VPValue *TripCount, *IncrementValue;
2402 IncrementValue = CanonicalIVIncrement;
2408 IncrementValue = CanonicalIVPHI;
2412 auto *EntryIncrement = Builder.createOverflowingOp(
2420 {EntryIncrement, TC, ALMMultiplier},
DL,
2421 "active.lane.mask.entry");
2427 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2432 Builder.setInsertPoint(OriginalTerminator);
2433 auto *InLoopIncrement =
2435 {IncrementValue}, {
false,
false},
DL);
2437 {InLoopIncrement, TripCount, ALMMultiplier},
2438 DL,
"active.lane.mask.next");
2443 auto *NotMask = Builder.createNot(ALM,
DL);
2456 auto *FoundWidenCanonicalIVUser =
find_if(
2460 "Must have at most one VPWideCanonicalIVRecipe");
2461 if (FoundWidenCanonicalIVUser !=
2463 auto *WideCanonicalIV =
2465 WideCanonicalIVs.
push_back(WideCanonicalIV);
2473 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2474 WideCanonicalIVs.
push_back(WidenOriginalIV);
2480 for (
auto *Wide : WideCanonicalIVs) {
2486 assert(VPI->getOperand(0) == Wide &&
2487 "WidenCanonicalIV must be the first operand of the compare");
2488 assert(!HeaderMask &&
"Multiple header masks found?");
2496 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2499 UseActiveLaneMaskForControlFlow) &&
2500 "DataAndControlFlowWithoutRuntimeCheck implies "
2501 "UseActiveLaneMaskForControlFlow");
2504 auto *FoundWidenCanonicalIVUser =
find_if(
2506 assert(FoundWidenCanonicalIVUser &&
2507 "Must have widened canonical IV when tail folding!");
2509 auto *WideCanonicalIV =
2512 if (UseActiveLaneMaskForControlFlow) {
2522 nullptr,
"active.lane.mask");
2538 template <
typename OpTy>
bool match(OpTy *V)
const {
2551template <
typename Op0_t,
typename Op1_t>
2569 VPValue *Addr, *Mask, *EndPtr;
2572 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2574 EVLEndPtr->insertBefore(&CurRecipe);
2575 EVLEndPtr->setOperand(1, &EVL);
2579 if (
match(&CurRecipe,
2585 if (
match(&CurRecipe,
2590 AdjustEndPtr(EndPtr), EVL, Mask);
2603 AdjustEndPtr(EndPtr), EVL, Mask);
2606 if (Rdx->isConditional() &&
2611 if (Interleave->getMask() &&
2616 if (
match(&CurRecipe,
2634 "User of VF that we can't transform to EVL.");
2640 [&LoopRegion, &Plan](
VPUser *U) {
2642 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
2643 m_Specific(&Plan.getVFxUF()))) ||
2644 isa<VPWidenPointerInductionRecipe>(U);
2646 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2647 "increment of the canonical induction.");
2667 MaxEVL = Builder.createScalarZExtOrTrunc(
2671 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2672 VPValue *PrevEVL = Builder.createScalarPhi(
2686 Intrinsic::experimental_vp_splice,
2687 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
2690 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2708 VPValue *EVLMask = Builder.createICmp(
2726 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2727 "New recipe must define the same number of values as the "
2732 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2733 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2745 R->eraseFromParent();
2795 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2803 VPValue *StartV = CanonicalIVPHI->getStartValue();
2807 EVLPhi->insertAfter(CanonicalIVPHI);
2808 VPBuilder Builder(Header, Header->getFirstNonPhi());
2811 VPPhi *AVLPhi = Builder.createScalarPhi(
2815 if (MaxSafeElements) {
2825 auto *CanonicalIVIncrement =
2827 Builder.setInsertPoint(CanonicalIVIncrement);
2831 OpVPEVL = Builder.createScalarZExtOrTrunc(
2832 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2834 auto *NextEVLIV = Builder.createOverflowingOp(
2835 Instruction::Add, {OpVPEVL, EVLPhi},
2836 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2837 CanonicalIVIncrement->hasNoSignedWrap()},
2838 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2839 EVLPhi->addOperand(NextEVLIV);
2841 VPValue *NextAVL = Builder.createOverflowingOp(
2842 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2850 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2851 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2865 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2876 [[maybe_unused]]
bool FoundAVL =
2879 assert(FoundAVL &&
"Didn't find AVL?");
2887 [[maybe_unused]]
bool FoundAVLNext =
2890 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2901 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2904 "Unexpected canonical iv");
2910 CanonicalIV->eraseFromParent();
2923 match(LatchExitingBr,
2926 "Unexpected terminator in EVL loop");
2933 LatchExitingBr->eraseFromParent();
2943 return R->getRegion() ||
2947 for (
const SCEV *Stride : StridesMap.
values()) {
2950 const APInt *StrideConst;
2951 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
2967 unsigned BW = U->getType()->getScalarSizeInBits();
2973 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
2980 const SCEV *ScevExpr = ExpSCEV->getSCEV();
2983 if (NewSCEV != ScevExpr) {
2985 ExpSCEV->replaceAllUsesWith(NewExp);
2994 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
2998 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3003 while (!Worklist.
empty()) {
3006 if (!Visited.
insert(CurRec).second)
3028 RecWithFlags->isDisjoint()) {
3031 Instruction::Add, {
A,
B}, {
false,
false},
3032 RecWithFlags->getDebugLoc());
3033 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3034 RecWithFlags->replaceAllUsesWith(New);
3035 RecWithFlags->eraseFromParent();
3038 RecWithFlags->dropPoisonGeneratingFlags();
3043 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3044 "found instruction with poison generating flags not covered by "
3045 "VPRecipeWithIRFlags");
3050 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3062 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3063 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3064 if (AddrDef && WidenRec->isConsecutive() &&
3065 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3066 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3068 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3072 InterleaveRec->getInterleaveGroup();
3073 bool NeedPredication =
false;
3075 I < NumMembers; ++
I) {
3078 NeedPredication |= BlockNeedsPredication(Member->getParent());
3081 if (NeedPredication)
3082 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3094 if (InterleaveGroups.empty())
3101 for (
const auto *IG : InterleaveGroups) {
3107 StoredValues.
push_back(StoreR->getStoredValue());
3108 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3115 StoredValues.
push_back(StoreR->getStoredValue());
3119 bool NeedsMaskForGaps =
3120 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3121 (!StoredValues.
empty() && !IG->isFull());
3133 VPValue *Addr = Start->getAddr();
3142 assert(IG->getIndex(IRInsertPos) != 0 &&
3143 "index of insert position shouldn't be zero");
3147 IG->getIndex(IRInsertPos),
3151 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3157 if (IG->isReverse()) {
3160 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3161 ReversePtr->insertBefore(InsertPos);
3165 InsertPos->getMask(), NeedsMaskForGaps,
3166 InterleaveMD, InsertPos->getDebugLoc());
3167 VPIG->insertBefore(InsertPos);
3170 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3173 if (!Member->getType()->isVoidTy()) {
3234 AddOp = Instruction::Add;
3235 MulOp = Instruction::Mul;
3237 AddOp =
ID.getInductionOpcode();
3238 MulOp = Instruction::FMul;
3239 Flags =
ID.getInductionBinOp()->getFastMathFlags();
3247 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3248 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3257 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3262 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3263 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3269 WidePHI->insertBefore(WidenIVR);
3280 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3284 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3287 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3290 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3297 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3300 WidePHI->addOperand(
Next);
3328 VPlan *Plan = R->getParent()->getPlan();
3329 VPValue *Start = R->getStartValue();
3330 VPValue *Step = R->getStepValue();
3331 VPValue *VF = R->getVFValue();
3333 assert(R->getInductionDescriptor().getKind() ==
3335 "Not a pointer induction according to InductionDescriptor!");
3338 "Recipe should have been replaced");
3344 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3348 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3351 Offset = Builder.createNaryOp(Instruction::Mul, {
Offset, Step});
3352 VPValue *PtrAdd = Builder.createNaryOp(
3354 R->replaceAllUsesWith(PtrAdd);
3359 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3361 VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
3364 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3373 if (!R->isReplicator())
3377 R->dissolveToCFGLoop();
3402 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3403 Select = Builder.createSelect(Blend->getMask(
I),
3404 Blend->getIncomingValue(
I),
Select,
3405 R.getDebugLoc(),
"predphi");
3406 Blend->replaceAllUsesWith(
Select);
3426 ? Instruction::UIToFP
3427 : Instruction::Trunc;
3428 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3434 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3439 Flags = {VPI->getFastMathFlags()};
3444 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3446 VPI->replaceAllUsesWith(VectorStep);
3452 R->eraseFromParent();
3465 "unsupported early exit VPBB");
3476 "Terminator must be be BranchOnCond");
3477 VPValue *CondOfEarlyExitingVPBB =
3479 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3480 ? CondOfEarlyExitingVPBB
3481 : Builder.createNot(CondOfEarlyExitingVPBB);
3498 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3503 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3504 if (ExitIRI->getNumOperands() != 1) {
3507 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3510 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3511 if (!IncomingFromEarlyExit->
isLiveIn()) {
3515 "first.active.lane");
3518 nullptr,
"early.exit.value");
3519 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3529 "Unexpected terminator");
3530 auto *IsLatchExitTaken =
3532 LatchExitingBranch->getOperand(1));
3533 auto *AnyExitTaken = Builder.createNaryOp(
3534 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3536 LatchExitingBranch->eraseFromParent();
3546 Type *RedTy = Ctx.Types.inferScalarType(Red);
3547 VPValue *VecOp = Red->getVecOp();
3550 auto IsExtendedRedValidAndClampRange =
3567 ExtRedCost = Ctx.TTI.getPartialReductionCost(
3568 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
3571 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
3572 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
3573 Red->getFastMathFlags(),
CostKind);
3575 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3583 IsExtendedRedValidAndClampRange(
3586 Ctx.Types.inferScalarType(
A)))
3606 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3609 Type *RedTy = Ctx.Types.inferScalarType(Red);
3612 auto IsMulAccValidAndClampRange =
3619 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3622 if (IsPartialReduction) {
3624 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
3627 MulAccCost = Ctx.TTI.getPartialReductionCost(
3628 Opcode, SrcTy, SrcTy2, RedTy, VF,
3638 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
3642 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
3644 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
3652 ExtCost += Ext0->computeCost(VF, Ctx);
3654 ExtCost += Ext1->computeCost(VF, Ctx);
3656 ExtCost += OuterExt->computeCost(VF, Ctx);
3658 return MulAccCost.
isValid() &&
3659 MulAccCost < ExtCost + MulCost + RedCost;
3664 VPValue *VecOp = Red->getVecOp();
3682 if (!ExtA || ExtB || !ValB->
isLiveIn())
3698 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
3699 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
3700 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
3701 Mul->setOperand(1, ExtB);
3713 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
3718 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
3725 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
3742 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
3751 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3752 Ext0->getOpcode() == Ext1->getOpcode() &&
3753 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
3755 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3756 *Ext0, Ext0->getDebugLoc());
3757 NewExt0->insertBefore(Ext0);
3762 Ext->getResultType(), *Ext1, *Ext1,
3763 Ext1->getDebugLoc());
3766 Mul->setOperand(0, NewExt0);
3767 Mul->setOperand(1, NewExt1);
3768 Red->setOperand(1,
Mul);
3781 auto IP = std::next(Red->getIterator());
3782 auto *VPBB = Red->getParent();
3792 Red->replaceAllUsesWith(AbstractR);
3822 for (
VPValue *VPV : VPValues) {
3824 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3832 if (
User->usesScalars(VPV))
3835 HoistPoint = HoistBlock->
begin();
3839 "All users must be in the vector preheader or dominated by it");
3844 VPV->replaceUsesWithIf(Broadcast,
3845 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3846 return Broadcast != &U && !U.usesScalars(VPV);
3854 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3855 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3889 auto *TCMO = Builder.createNaryOp(
3917 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
3919 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
3926 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
3936 DefR->replaceUsesWithIf(
3937 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3939 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3953 for (
VPValue *Def : R.definedValues()) {
3966 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
3968 return U->usesScalars(Def) &&
3971 if (
none_of(Def->users(), IsCandidateUnpackUser))
3978 Unpack->insertAfter(&R);
3979 Def->replaceUsesWithIf(Unpack,
3980 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
3981 return IsCandidateUnpackUser(&U);
3991 bool RequiresScalarEpilogue) {
3993 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4012 if (TailByMasking) {
4013 TC = Builder.createNaryOp(
4015 {TC, Builder.createNaryOp(Instruction::Sub,
4026 Builder.createNaryOp(Instruction::URem, {TC, Step},
4035 if (RequiresScalarEpilogue) {
4037 "requiring scalar epilogue is not supported with fail folding");
4040 R = Builder.createSelect(IsZero, Step, R);
4043 VPValue *Res = Builder.createNaryOp(
4062 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4069 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4073 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4078 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
4088 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4096 const SCEV *Expr = ExpSCEV->getSCEV();
4099 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4104 ExpSCEV->eraseFromParent();
4107 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4108 "after any VPIRInstructions");
4111 auto EI = Entry->begin();
4121 return ExpandedSCEVs;
4141 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4150 unsigned VectorRegWidth) {
4151 if (!InterleaveR || InterleaveR->
getMask())
4154 Type *GroupElementTy =
nullptr;
4158 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4159 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4166 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4167 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4174 return IG->getFactor() == VF && IG->getNumMembers() == VF &&
4175 GroupSize == VectorRegWidth;
4183 return RepR && RepR->isSingleScalar();
4190 auto *R = V->getDefiningRecipe();
4198 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4199 WideMember0->setOperand(
4208 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4210 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4211 false, {}, LoadGroup->getDebugLoc());
4212 L->insertBefore(LoadGroup);
4218 assert(RepR->isSingleScalar() &&
4220 "must be a single scalar load");
4221 NarrowedOps.
insert(RepR);
4226 VPValue *PtrOp = WideLoad->getAddr();
4228 PtrOp = VecPtr->getOperand(0);
4233 nullptr, *WideLoad);
4234 N->insertBefore(WideLoad);
4240 unsigned VectorRegWidth) {
4265 if (R.mayWriteToMemory() && !InterleaveR)
4287 if (InterleaveR->getStoredValues().empty())
4292 auto *Member0 = InterleaveR->getStoredValues()[0];
4294 all_of(InterleaveR->getStoredValues(),
4295 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4303 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4306 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4307 return IR && IR->getInterleaveGroup()->isFull() &&
4308 IR->getVPValue(Op.index()) == Op.value();
4317 InterleaveR->getStoredValues()[0]->getDefiningRecipe());
4320 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4322 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4323 R->getNumOperands() > 2)
4326 [WideMember0, Idx =
I](
const auto &
P) {
4327 const auto &[OpIdx, OpV] = P;
4328 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4335 if (StoreGroups.
empty())
4341 for (
auto *StoreGroup : StoreGroups) {
4347 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
4348 false, {}, StoreGroup->getDebugLoc());
4349 S->insertBefore(StoreGroup);
4350 StoreGroup->eraseFromParent();
4368 Inc->setOperand(1, UF);
4387 "must have a BranchOnCond");
4390 if (VF.
isScalable() && VScaleForTuning.has_value())
4391 VectorStep *= *VScaleForTuning;
4392 assert(VectorStep > 0 &&
"trip count should not be zero");
4396 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
4408 if (WideIntOrFp && WideIntOrFp->getTruncInst())
4415 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
4418 Start, VectorTC, Step);
4431 {EndValue, Start}, WideIV->
getDebugLoc(),
"bc.resume.val");
4432 return ResumePhiRecipe;
4444 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4455 WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
4458 IVEndValues[WideIVR] = ResumePhi->getOperand(0);
4459 ScalarPhiIRI->addOperand(ResumePhi);
4466 "should only skip truncated wide inductions");
4474 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
4476 "Cannot handle loops with uncountable early exits");
4480 "vector.recur.extract");
4481 StringRef Name = IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx";
4483 {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
4493 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
4494 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
4506 "Cannot handle loops with uncountable early exits");
4578 for (
VPUser *U : FOR->users()) {
4592 {},
"vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLanePerPart, Op0_t > m_ExtractLastLanePerPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...