57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
84 Ingredient.getDebugLoc());
87 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
88 nullptr ,
false ,
false , *VPI,
89 Ingredient.getDebugLoc());
92 Ingredient.getDebugLoc());
100 *VPI, CI->getDebugLoc());
103 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
107 *VPI, Ingredient.getDebugLoc());
111 "inductions must be created earlier");
120 "Only recpies with zero or one defined values expected");
121 Ingredient.eraseFromParent();
138 if (
A->getOpcode() != Instruction::Store ||
139 B->getOpcode() != Instruction::Store)
149 const APInt *Distance;
155 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
157 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
163 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
165 auto VFs =
B->getParent()->getPlan()->vectorFactors();
169 return Distance->
abs().
uge(
177 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
178 L(L), TypeInfo(TypeInfo) {}
185 return ExcludeRecipes.contains(&R) ||
186 (Store && isNoAliasViaDistance(Store, &GroupLeader));
199 std::optional<SinkStoreInfo> SinkInfo = {}) {
200 bool CheckReads = SinkInfo.has_value();
207 if (SinkInfo && SinkInfo->shouldSkip(R))
211 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
229template <
unsigned Opcode>
234 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
235 "Only Load and Store opcodes supported");
236 constexpr bool IsLoad = (Opcode == Instruction::Load);
243 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
247 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
250 RecipesByAddress[AddrSCEV].push_back(RepR);
255 for (
auto &Group :
Groups) {
274 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
279 return RepR && RepR->getOpcode() == Instruction::Alloca;
288 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
304 if (!ScalarVFOnly && RepR->isSingleScalar())
307 WorkList.
insert({SinkTo, Candidate});
319 for (
auto &Recipe : *VPBB)
321 InsertIfValidSinkCandidate(VPBB,
Op);
325 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
328 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
333 auto UsersOutsideSinkTo =
335 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
337 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
338 return !U->usesFirstLaneOnly(SinkCandidate);
341 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
343 if (NeedsDuplicating) {
347 if (
auto *SinkCandidateRepR =
353 nullptr , *SinkCandidateRepR,
357 Clone = SinkCandidate->
clone();
367 InsertIfValidSinkCandidate(SinkTo,
Op);
377 if (!EntryBB || EntryBB->size() != 1 ||
387 if (EntryBB->getNumSuccessors() != 2)
392 if (!Succ0 || !Succ1)
395 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
397 if (Succ0->getSingleSuccessor() == Succ1)
399 if (Succ1->getSingleSuccessor() == Succ0)
416 if (!Region1->isReplicator())
418 auto *MiddleBasicBlock =
420 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
425 if (!Region2 || !Region2->isReplicator())
430 if (!Mask1 || Mask1 != Mask2)
433 assert(Mask1 && Mask2 &&
"both region must have conditions");
439 if (TransformedRegions.
contains(Region1))
446 if (!Then1 || !Then2)
466 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
472 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
473 Phi1ToMove.eraseFromParent();
476 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
490 TransformedRegions.
insert(Region1);
493 return !TransformedRegions.
empty();
500 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
501 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
502 auto *BlockInMask = PredRecipe->
getMask();
521 RecipeWithoutMask->getDebugLoc());
545 if (RepR->isPredicated())
564 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
576 if (!VPBB->getParent())
580 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
589 R.moveBefore(*PredVPBB, PredVPBB->
end());
591 auto *ParentRegion = VPBB->getParent();
592 if (ParentRegion && ParentRegion->getExiting() == VPBB)
593 ParentRegion->setExiting(PredVPBB);
597 return !WorkList.
empty();
604 bool ShouldSimplify =
true;
605 while (ShouldSimplify) {
621 if (!
IV ||
IV->getTruncInst())
636 for (
auto *U : FindMyCast->
users()) {
638 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
639 FoundUserCast = UserCast;
643 FindMyCast = FoundUserCast;
668 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
682 WidenOriginalIV->dropPoisonGeneratingFlags();
695 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
697 if (IsConditionalAssume)
700 if (R.mayHaveSideEffects())
704 return all_of(R.definedValues(),
705 [](
VPValue *V) { return V->getNumUsers() == 0; });
724 VPUser *PhiUser = PhiR->getSingleUser();
727 if (PhiUser !=
Incoming->getDefiningRecipe() ||
730 PhiR->replaceAllUsesWith(Start);
731 PhiR->eraseFromParent();
732 Incoming->getDefiningRecipe()->eraseFromParent();
747 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
757 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
763 if (ResultTy != StepTy) {
770 Builder.setInsertPoint(VecPreheader);
771 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
773 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
779 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
784 Users.insert_range(V->users());
786 return Users.takeVector();
800 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
837 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
838 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
846 Def->operands(),
true,
848 Clone->insertAfter(Def);
849 Def->replaceAllUsesWith(Clone);
860 PtrIV->replaceAllUsesWith(PtrAdd);
867 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
868 return U->usesScalars(WideIV);
874 Plan,
ID.getKind(),
ID.getInductionOpcode(),
876 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
877 WideIV->getDebugLoc(), Builder);
880 if (!HasOnlyVectorVFs) {
882 "plans containing a scalar VF cannot also include scalable VFs");
883 WideIV->replaceAllUsesWith(Steps);
886 WideIV->replaceUsesWithIf(Steps,
887 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
889 return U.usesFirstLaneOnly(WideIV);
890 return U.usesScalars(WideIV);
906 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
911 if (!Def || Def->getNumOperands() != 2)
919 auto IsWideIVInc = [&]() {
920 auto &
ID = WideIV->getInductionDescriptor();
923 VPValue *IVStep = WideIV->getStepValue();
924 switch (
ID.getInductionOpcode()) {
925 case Instruction::Add:
927 case Instruction::FAdd:
929 case Instruction::FSub:
932 case Instruction::Sub: {
952 return IsWideIVInc() ? WideIV :
nullptr;
972 if (WideIntOrFp && WideIntOrFp->getTruncInst())
985 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
986 FirstActiveLaneType,
DL);
987 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
994 EndValue =
B.createAdd(EndValue, One,
DL);
997 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
999 VPIRValue *Start = WideIV->getStartValue();
1000 VPValue *Step = WideIV->getStepValue();
1001 EndValue =
B.createDerivedIV(
1003 Start, EndValue, Step);
1018 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1025 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1028 Start, VectorTC, Step);
1057 assert(EndValue &&
"Must have computed the end value up front");
1073 auto *Zero = Plan.
getZero(StepTy);
1074 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1079 return B.createNaryOp(
1080 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1082 : Instruction::FAdd,
1083 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1095 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1104 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1105 EndValues[WideIV] = EndValue;
1115 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1116 R.eraseFromParent();
1125 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1127 if (PredVPBB == MiddleVPBB)
1129 ExitIRI->getOperand(Idx),
1133 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1135 ExitIRI->setOperand(Idx, Escape);
1152 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1155 ExpR->replaceAllUsesWith(V->second);
1156 ExpR->eraseFromParent();
1165 while (!WorkList.
empty()) {
1167 if (!Seen.
insert(Cur).second)
1175 R->eraseFromParent();
1182static std::optional<std::pair<bool, unsigned>>
1185 std::optional<std::pair<bool, unsigned>>>(R)
1188 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1190 return std::make_pair(
true,
I->getVectorIntrinsicID());
1192 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1196 return std::make_pair(
false,
1199 .
Default([](
auto *) {
return std::nullopt; });
1217 Value *V =
Op->getUnderlyingValue();
1223 auto FoldToIRValue = [&]() ->
Value * {
1225 if (OpcodeOrIID->first) {
1226 if (R.getNumOperands() != 2)
1228 unsigned ID = OpcodeOrIID->second;
1229 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1232 unsigned Opcode = OpcodeOrIID->second;
1241 return Folder.FoldSelect(
Ops[0],
Ops[1],
1244 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1246 case Instruction::Select:
1247 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1248 case Instruction::ICmp:
1249 case Instruction::FCmp:
1252 case Instruction::GetElementPtr: {
1255 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1265 case Instruction::ExtractElement:
1272 if (
Value *V = FoldToIRValue())
1273 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1279 VPlan *Plan = Def->getParent()->getPlan();
1285 return Def->replaceAllUsesWith(V);
1291 PredPHI->replaceAllUsesWith(
Op);
1304 bool CanCreateNewRecipe =
1311 if (TruncTy == ATy) {
1312 Def->replaceAllUsesWith(
A);
1321 : Instruction::ZExt;
1324 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1326 Ext->setUnderlyingValue(UnderlyingExt);
1328 Def->replaceAllUsesWith(Ext);
1330 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1331 Def->replaceAllUsesWith(Trunc);
1339 for (
VPUser *U :
A->users()) {
1341 for (
VPValue *VPV : R->definedValues())
1355 Def->replaceAllUsesWith(
X);
1356 Def->eraseFromParent();
1362 return Def->replaceAllUsesWith(
1367 return Def->replaceAllUsesWith(
X);
1371 return Def->replaceAllUsesWith(
1376 return Def->replaceAllUsesWith(
1381 return Def->replaceAllUsesWith(
X);
1385 return Def->replaceAllUsesWith(Plan->
getFalse());
1389 return Def->replaceAllUsesWith(
X);
1392 if (CanCreateNewRecipe &&
1397 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1398 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1399 return Def->replaceAllUsesWith(
1400 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1405 return Def->replaceAllUsesWith(Def->getOperand(1));
1410 return Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1414 return Def->replaceAllUsesWith(Plan->
getFalse());
1417 return Def->replaceAllUsesWith(
X);
1421 if (CanCreateNewRecipe &&
1423 return Def->replaceAllUsesWith(Builder.createNot(
C));
1427 Def->setOperand(0,
C);
1428 Def->setOperand(1,
Y);
1429 Def->setOperand(2,
X);
1434 return Def->replaceAllUsesWith(
A);
1437 return Def->replaceAllUsesWith(
A);
1440 return Def->replaceAllUsesWith(
1447 return Def->replaceAllUsesWith(
1449 Def->getDebugLoc(),
"", NW));
1455 return Def->replaceAllUsesWith(Builder.createNaryOp(
1457 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1462 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1463 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1464 if (CanCreateNewRecipe && !IsInReplicateRegion &&
1466 return Def->replaceAllUsesWith(Builder.createNaryOp(
1468 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1473 return Def->replaceAllUsesWith(
A);
1488 R->setOperand(1,
Y);
1489 R->setOperand(2,
X);
1493 R->replaceAllUsesWith(Cmp);
1498 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1499 Cmp->setDebugLoc(Def->getDebugLoc());
1511 if (
Op->getNumUsers() > 1 ||
1515 }
else if (!UnpairedCmp) {
1516 UnpairedCmp =
Op->getDefiningRecipe();
1520 UnpairedCmp =
nullptr;
1527 if (NewOps.
size() < Def->getNumOperands()) {
1529 return Def->replaceAllUsesWith(NewAnyOf);
1536 if (CanCreateNewRecipe &&
1542 return Def->replaceAllUsesWith(NewCmp);
1550 return Def->replaceAllUsesWith(Def->getOperand(1));
1556 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1557 Def->replaceAllUsesWith(
X);
1567 Def->setOperand(1, Def->getOperand(0));
1568 Def->setOperand(0,
Y);
1575 return Def->replaceAllUsesWith(Def->getOperand(0));
1581 Def->replaceAllUsesWith(
1582 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1586 return Def->replaceAllUsesWith(
A);
1592 Def->replaceAllUsesWith(
1593 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1600 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1605 Def->replaceAllUsesWith(
1615 "broadcast operand must be single-scalar");
1616 Def->setOperand(0,
C);
1621 if (Def->getNumOperands() == 1) {
1622 Def->replaceAllUsesWith(Def->getOperand(0));
1627 Phi->replaceAllUsesWith(Phi->getOperand(0));
1633 if (Def->getNumOperands() == 1 &&
1635 return Def->replaceAllUsesWith(IRV);
1648 return Def->replaceAllUsesWith(
A);
1651 Def->replaceAllUsesWith(Builder.createNaryOp(
1652 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1666 auto *IVInc = Def->getOperand(0);
1667 if (IVInc->getNumUsers() == 2) {
1672 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1673 Def->replaceAllUsesWith(IVInc);
1675 Inc->replaceAllUsesWith(Phi);
1676 Phi->setOperand(0,
Y);
1692 Steps->replaceAllUsesWith(Steps->getOperand(0));
1700 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1702 return PhiR && PhiR->isInLoop();
1708 Def->replaceAllUsesWith(
A);
1717 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1718 return Def->replaceAllUsesWith(
A);
1722 return Def->replaceAllUsesWith(
A);
1749 while (!Worklist.
empty()) {
1758 R->replaceAllUsesWith(
1759 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1778 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1787 !WidenStoreR->isConsecutive()) {
1788 assert(!WidenStoreR->isReverse() &&
1789 "Not consecutive memory recipes shouldn't be reversed");
1790 VPValue *Mask = WidenStoreR->getMask();
1799 {WidenStoreR->getOperand(1)});
1804 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1805 true ,
nullptr , {},
1807 ScalarStore->insertBefore(WidenStoreR);
1808 WidenStoreR->eraseFromParent();
1816 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1817 true ,
nullptr , *RepR ,
1818 *RepR , RepR->getDebugLoc());
1819 Clone->insertBefore(RepOrWidenR);
1821 VPValue *ExtractOp = Clone->getOperand(0);
1827 Clone->setOperand(0, ExtractOp);
1828 RepR->eraseFromParent();
1837 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1846 return !U->usesScalars(
Op);
1850 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1853 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1854 IntroducesBCastOf(Op)))
1858 auto *IRV = dyn_cast<VPIRValue>(Op);
1859 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1860 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1861 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1866 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1867 true ,
nullptr, *RepOrWidenR);
1868 Clone->insertBefore(RepOrWidenR);
1869 RepOrWidenR->replaceAllUsesWith(Clone);
1871 RepOrWidenR->eraseFromParent();
1907 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1908 UniqueValues.
insert(Blend->getIncomingValue(0));
1909 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1911 UniqueValues.
insert(Blend->getIncomingValue(
I));
1913 if (UniqueValues.
size() == 1) {
1914 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1915 Blend->eraseFromParent();
1919 if (Blend->isNormalized())
1925 unsigned StartIndex = 0;
1926 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1931 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1938 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1940 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1941 if (
I == StartIndex)
1943 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1944 OperandsWithMask.
push_back(Blend->getMask(
I));
1949 OperandsWithMask, *Blend, Blend->getDebugLoc());
1950 NewBlend->insertBefore(&R);
1952 VPValue *DeadMask = Blend->getMask(StartIndex);
1954 Blend->eraseFromParent();
1959 if (NewBlend->getNumOperands() == 3 &&
1961 VPValue *Inc0 = NewBlend->getOperand(0);
1962 VPValue *Inc1 = NewBlend->getOperand(1);
1963 VPValue *OldMask = NewBlend->getOperand(2);
1964 NewBlend->setOperand(0, Inc1);
1965 NewBlend->setOperand(1, Inc0);
1966 NewBlend->setOperand(2, NewMask);
1993 APInt MaxVal = AlignedTC - 1;
1996 unsigned NewBitWidth =
2002 bool MadeChange =
false;
2011 if (!WideIV || !WideIV->isCanonical() ||
2012 WideIV->hasMoreThanOneUniqueUser() ||
2013 NewIVTy == WideIV->getScalarType())
2018 VPUser *SingleUser = WideIV->getSingleUser();
2026 auto *NewStart = Plan.
getZero(NewIVTy);
2027 WideIV->setStartValue(NewStart);
2029 WideIV->setStepValue(NewStep);
2036 Cmp->setOperand(1, NewBTC);
2050 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2052 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2065 const SCEV *VectorTripCount =
2070 "Trip count SCEV must be computable");
2091 auto *Term = &ExitingVPBB->
back();
2104 for (
unsigned Part = 0; Part < UF; ++Part) {
2110 Extracts[Part] = Ext;
2122 match(Phi->getBackedgeValue(),
2124 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2141 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2148 "Expected incoming values of Phi to be ActiveLaneMasks");
2153 EntryALM->setOperand(2, ALMMultiplier);
2154 LoopALM->setOperand(2, ALMMultiplier);
2158 ExtractFromALM(EntryALM, EntryExtracts);
2163 ExtractFromALM(LoopALM, LoopExtracts);
2165 Not->setOperand(0, LoopExtracts[0]);
2168 for (
unsigned Part = 0; Part < UF; ++Part) {
2169 Phis[Part]->setStartValue(EntryExtracts[Part]);
2170 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2183 auto *Term = &ExitingVPBB->
back();
2195 const SCEV *VectorTripCount =
2201 "Trip count SCEV must be computable");
2220 Term->setOperand(1, Plan.
getTrue());
2225 {}, Term->getDebugLoc());
2227 Term->eraseFromParent();
2262 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2272 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2273 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2301 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2304 if (SinkCandidate == Previous)
2308 !Seen.
insert(SinkCandidate).second ||
2321 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2324 "only recipes with a single defined value expected");
2339 if (SinkCandidate == FOR)
2342 SinkCandidate->moveAfter(Previous);
2343 Previous = SinkCandidate;
2367 [&VPDT, HoistPoint](
VPUser *U) {
2368 auto *R = cast<VPRecipeBase>(U);
2369 return HoistPoint == R ||
2370 VPDT.properlyDominates(HoistPoint, R);
2372 "HoistPoint must dominate all users of FOR");
2374 auto NeedsHoisting = [HoistPoint, &VPDT,
2376 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2377 if (!HoistCandidate)
2382 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2383 "CFG in VPlan should still be flat, without replicate regions");
2385 if (!Visited.
insert(HoistCandidate).second)
2397 return HoistCandidate;
2406 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2409 "only recipes with a single defined value expected");
2421 if (
auto *R = NeedsHoisting(
Op)) {
2424 if (R->getNumDefinedValues() != 1)
2438 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2458 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2461 while (
auto *PrevPhi =
2463 assert(PrevPhi->getParent() == FOR->getParent());
2465 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2484 {FOR, FOR->getBackedgeValue()});
2489 RecurSplice->setOperand(0, FOR);
2495 for (
VPUser *U : RecurSplice->users()) {
2505 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2506 VPValue *PenultimateLastIter =
2508 {PenultimateIndex, FOR->getBackedgeValue()});
2513 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2526 RecurKind RK = PhiR->getRecurrenceKind();
2533 RecWithFlags->dropPoisonGeneratingFlags();
2539struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2541 return Def == getEmptyKey() || Def == getTombstoneKey();
2552 return GEP->getSourceElementType();
2555 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2556 [](
auto *
I) {
return I->getSourceElementType(); })
2557 .
Default([](
auto *) {
return nullptr; });
2561 static bool canHandle(
const VPSingleDefRecipe *Def) {
2570 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2571 C->second == Instruction::ExtractValue)))
2577 return !
Def->mayReadFromMemory();
2581 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2582 const VPlan *Plan =
Def->getParent()->getPlan();
2583 VPTypeAnalysis TypeInfo(*Plan);
2586 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2589 if (RFlags->hasPredicate())
2595 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2598 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2600 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2602 !
equal(
L->operands(),
R->operands()))
2605 "must have valid opcode info for both recipes");
2607 if (LFlags->hasPredicate() &&
2608 LFlags->getPredicate() !=
2614 const VPRegionBlock *RegionL =
L->getRegion();
2615 const VPRegionBlock *RegionR =
R->getRegion();
2618 L->getParent() !=
R->getParent())
2620 const VPlan *Plan =
L->getParent()->getPlan();
2621 VPTypeAnalysis TypeInfo(*Plan);
2622 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2638 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2642 if (!VPDT.
dominates(V->getParent(), VPBB))
2647 Def->replaceAllUsesWith(V);
2666 "Expected vector prehader's successor to be the vector loop region");
2673 return !Op->isDefinedOutsideLoopRegions();
2676 R.moveBefore(*Preheader, Preheader->
end());
2693 assert(!RepR->isPredicated() &&
2694 "Expected prior transformation of predicated replicates to "
2695 "replicate regions");
2700 if (!RepR->isSingleScalar())
2710 if (Def->getNumUsers() == 0)
2719 auto *UserR = cast<VPRecipeBase>(U);
2720 VPBasicBlock *Parent = UserR->getParent();
2723 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2726 if (SinkBB && SinkBB != Parent)
2740 "Defining block must dominate sink block");
2766 VPValue *ResultVPV = R.getVPSingleValue();
2768 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2769 if (!NewResSizeInBits)
2782 (void)OldResSizeInBits;
2790 VPW->dropPoisonGeneratingFlags();
2792 if (OldResSizeInBits != NewResSizeInBits &&
2796 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2798 Ext->insertAfter(&R);
2800 Ext->setOperand(0, ResultVPV);
2801 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2804 "Only ICmps should not need extending the result.");
2814 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2815 auto *
Op = R.getOperand(Idx);
2816 unsigned OpSizeInBits =
2818 if (OpSizeInBits == NewResSizeInBits)
2820 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2821 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2823 R.setOperand(Idx, ProcessedIter->second);
2831 Builder.setInsertPoint(&R);
2833 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2834 ProcessedIter->second = NewOp;
2835 R.setOperand(Idx, NewOp);
2843 std::optional<VPDominatorTree> VPDT;
2857 assert(VPBB->getNumSuccessors() == 2 &&
2858 "Two successors expected for BranchOnCond");
2859 unsigned RemovedIdx;
2870 "There must be a single edge between VPBB and its successor");
2879 VPBB->back().eraseFromParent();
2933 VPValue *StartV = CanonicalIVPHI->getStartValue();
2935 auto *CanonicalIVIncrement =
2938 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2939 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2950 auto *EntryIncrement = Builder.createOverflowingOp(
2952 DL,
"index.part.next");
2958 {EntryIncrement, TC, ALMMultiplier},
DL,
2959 "active.lane.mask.entry");
2965 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2970 Builder.setInsertPoint(OriginalTerminator);
2971 auto *InLoopIncrement = Builder.createOverflowingOp(
2973 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2975 {InLoopIncrement, TC, ALMMultiplier},
DL,
2976 "active.lane.mask.next");
2981 auto *NotMask = Builder.createNot(ALM,
DL);
2988 bool UseActiveLaneMaskForControlFlow) {
2990 auto *FoundWidenCanonicalIVUser =
find_if(
2992 assert(FoundWidenCanonicalIVUser &&
2993 "Must have widened canonical IV when tail folding!");
2995 auto *WideCanonicalIV =
2998 if (UseActiveLaneMaskForControlFlow) {
3007 nullptr,
"active.lane.mask");
3023 template <
typename OpTy>
bool match(OpTy *V)
const {
3034template <
typename Op0_t,
typename Op1_t>
3053 VPValue *Addr, *Mask, *EndPtr;
3056 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3058 EVLEndPtr->insertBefore(&CurRecipe);
3059 EVLEndPtr->setOperand(1, &EVL);
3063 if (
match(&CurRecipe,
3077 LoadR->insertBefore(&CurRecipe);
3079 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3088 StoredVal, EVL, Mask);
3090 if (
match(&CurRecipe,
3096 Intrinsic::experimental_vp_reverse,
3097 {ReversedVal, Plan->
getTrue(), &EVL},
3101 AdjustEndPtr(EndPtr), NewReverse, EVL,
3106 if (Rdx->isConditional() &&
3111 if (Interleave->getMask() &&
3116 if (
match(&CurRecipe,
3125 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3145 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3150 HeaderMask = R.getVPSingleValue();
3162 NewR->insertBefore(R);
3163 for (
auto [Old, New] :
3164 zip_equal(R->definedValues(), NewR->definedValues()))
3165 Old->replaceAllUsesWith(New);
3179 Merge->insertBefore(LogicalAnd);
3180 LogicalAnd->replaceAllUsesWith(
Merge);
3188 R->eraseFromParent();
3205 "User of VF that we can't transform to EVL.");
3211 [&LoopRegion, &Plan](
VPUser *U) {
3213 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3214 m_Specific(&Plan.getVFxUF()))) ||
3215 isa<VPWidenPointerInductionRecipe>(U);
3217 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3218 "increment of the canonical induction.");
3234 MaxEVL = Builder.createScalarZExtOrTrunc(
3238 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3239 VPValue *PrevEVL = Builder.createScalarPhi(
3253 Intrinsic::experimental_vp_splice,
3254 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3258 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3275 VPValue *EVLMask = Builder.createICmp(
3336 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3344 VPValue *StartV = CanonicalIVPHI->getStartValue();
3347 auto *CurrentIteration =
3349 CurrentIteration->insertAfter(CanonicalIVPHI);
3350 VPBuilder Builder(Header, Header->getFirstNonPhi());
3353 VPPhi *AVLPhi = Builder.createScalarPhi(
3357 if (MaxSafeElements) {
3367 auto *CanonicalIVIncrement =
3369 Builder.setInsertPoint(CanonicalIVIncrement);
3373 OpVPEVL = Builder.createScalarZExtOrTrunc(
3374 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3376 auto *NextIter = Builder.createAdd(
3377 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3378 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3379 CurrentIteration->addOperand(NextIter);
3383 "avl.next", {
true,
false});
3391 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3392 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3406 assert(!CurrentIteration &&
3407 "Found multiple CurrentIteration. Only one expected");
3408 CurrentIteration = PhiR;
3412 if (!CurrentIteration)
3423 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3429 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3432 "Unexpected canonical iv");
3438 CanonicalIV->eraseFromParent();
3458 if (!
match(EVLPhi->getBackedgeValue(),
3471 [[maybe_unused]]
bool FoundAVLNext =
3474 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3486 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3491 LatchBr->setOperand(
3502 return R->getRegion() ||
3506 for (
const SCEV *Stride : StridesMap.
values()) {
3509 const APInt *StrideConst;
3532 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3539 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3542 if (NewSCEV != ScevExpr) {
3544 ExpSCEV->replaceAllUsesWith(NewExp);
3553 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3557 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3562 while (!Worklist.
empty()) {
3565 if (!Visited.
insert(CurRec).second)
3587 RecWithFlags->isDisjoint()) {
3590 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3591 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3592 RecWithFlags->replaceAllUsesWith(New);
3593 RecWithFlags->eraseFromParent();
3596 RecWithFlags->dropPoisonGeneratingFlags();
3601 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3602 "found instruction with poison generating flags not covered by "
3603 "VPRecipeWithIRFlags");
3608 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3620 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3621 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3622 if (AddrDef && WidenRec->isConsecutive() &&
3623 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3624 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3626 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3630 InterleaveRec->getInterleaveGroup();
3631 bool NeedPredication =
false;
3633 I < NumMembers; ++
I) {
3636 NeedPredication |= BlockNeedsPredication(Member->getParent());
3639 if (NeedPredication)
3640 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3652 if (InterleaveGroups.empty())
3659 for (
const auto *IG : InterleaveGroups) {
3665 StoredValues.
push_back(StoreR->getStoredValue());
3666 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3673 StoredValues.
push_back(StoreR->getStoredValue());
3677 bool NeedsMaskForGaps =
3678 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3679 (!StoredValues.
empty() && !IG->isFull());
3691 VPValue *Addr = Start->getAddr();
3700 assert(IG->getIndex(IRInsertPos) != 0 &&
3701 "index of insert position shouldn't be zero");
3705 IG->getIndex(IRInsertPos),
3709 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3715 if (IG->isReverse()) {
3718 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3719 ReversePtr->insertBefore(InsertPos);
3723 InsertPos->getMask(), NeedsMaskForGaps,
3724 InterleaveMD, InsertPos->getDebugLoc());
3725 VPIG->insertBefore(InsertPos);
3728 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3731 if (!Member->getType()->isVoidTy()) {
3790 AddOp = Instruction::Add;
3791 MulOp = Instruction::Mul;
3793 AddOp =
ID.getInductionOpcode();
3794 MulOp = Instruction::FMul;
3802 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3803 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3812 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3817 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3818 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3824 WidePHI->insertBefore(WidenIVR);
3835 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3839 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3842 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3845 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3852 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3855 WidePHI->addOperand(
Next);
3883 VPlan *Plan = R->getParent()->getPlan();
3884 VPValue *Start = R->getStartValue();
3885 VPValue *Step = R->getStepValue();
3886 VPValue *VF = R->getVFValue();
3888 assert(R->getInductionDescriptor().getKind() ==
3890 "Not a pointer induction according to InductionDescriptor!");
3893 "Recipe should have been replaced");
3899 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3903 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3906 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3908 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3909 R->replaceAllUsesWith(PtrAdd);
3914 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3916 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3919 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3928 if (!R->isReplicator())
3932 R->dissolveToCFGLoop();
3953 assert(Br->getNumOperands() == 2 &&
3954 "BranchOnTwoConds must have exactly 2 conditions");
3958 assert(Successors.size() == 3 &&
3959 "BranchOnTwoConds must have exactly 3 successors");
3964 VPValue *Cond0 = Br->getOperand(0);
3965 VPValue *Cond1 = Br->getOperand(1);
3970 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3983 Br->eraseFromParent();
4006 WidenIVR->replaceAllUsesWith(PtrAdd);
4019 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4020 Select = Builder.createSelect(Blend->getMask(
I),
4021 Blend->getIncomingValue(
I),
Select,
4022 R.getDebugLoc(),
"predphi", *Blend);
4023 Blend->replaceAllUsesWith(
Select);
4028 if (!VEPR->getOffset()) {
4030 "Expected unroller to have materialized offset for UF != 1");
4031 VEPR->materializeOffset();
4046 for (
VPValue *
Op : LastActiveL->operands()) {
4047 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4052 VPValue *FirstInactiveLane = Builder.createNaryOp(
4054 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4060 Builder.createSub(FirstInactiveLane, One,
4061 LastActiveL->getDebugLoc(),
"last.active.lane");
4071 assert(VPI->isMasked() &&
4072 "Unmasked MaskedCond should be simplified earlier");
4073 VPI->replaceAllUsesWith(Builder.createNaryOp(
4083 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4086 ToRemove.push_back(BranchOnCountInst);
4101 ? Instruction::UIToFP
4102 : Instruction::Trunc;
4103 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4109 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4115 MulOpc = Instruction::FMul;
4116 Flags = VPI->getFastMathFlags();
4118 MulOpc = Instruction::Mul;
4123 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4125 VPI->replaceAllUsesWith(VectorStep);
4131 R->eraseFromParent();
4139 struct EarlyExitInfo {
4150 if (Pred == MiddleVPBB)
4155 VPValue *CondOfEarlyExitingVPBB;
4156 [[maybe_unused]]
bool Matched =
4157 match(EarlyExitingVPBB->getTerminator(),
4159 assert(Matched &&
"Terminator must be BranchOnCond");
4163 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4164 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4166 TrueSucc == ExitBlock
4167 ? CondOfEarlyExitingVPBB
4168 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4174 "exit condition must dominate the latch");
4183 assert(!Exits.
empty() &&
"must have at least one early exit");
4190 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4192 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4193 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4199 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4200 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4202 Exits[
I].EarlyExitingVPBB) &&
4203 "RPO sort must place dominating exits before dominated ones");
4209 VPValue *Combined = Exits[0].CondToExit;
4210 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4211 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4217 "Early exit store masking not implemented");
4221 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4225 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4233 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4267 for (
auto [Exit, VectorEarlyExitVPBB] :
4268 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4269 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4281 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4282 VPValue *NewIncoming = IncomingVal;
4284 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4289 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4290 ExitIRI->addOperand(NewIncoming);
4293 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4327 bool IsLastDispatch = (
I + 2 == Exits.
size());
4329 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4335 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4338 CurrentBB = FalseBB;
4345 "Unexpected terminator");
4346 auto *IsLatchExitTaken =
4348 LatchExitingBranch->getOperand(1));
4350 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4351 LatchExitingBranch->eraseFromParent();
4352 Builder.setInsertPoint(LatchVPBB);
4354 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4356 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4367 Type *RedTy = Ctx.Types.inferScalarType(Red);
4368 VPValue *VecOp = Red->getVecOp();
4371 auto IsExtendedRedValidAndClampRange =
4383 if (Red->isPartialReduction()) {
4388 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4389 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4392 ? std::optional{Red->getFastMathFlags()}
4396 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4397 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4398 Red->getFastMathFlags(),
CostKind);
4400 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4410 IsExtendedRedValidAndClampRange(
4413 Ctx.Types.inferScalarType(
A)))
4432 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4433 Opcode != Instruction::FAdd)
4436 Type *RedTy = Ctx.Types.inferScalarType(Red);
4439 auto IsMulAccValidAndClampRange =
4446 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4449 if (Red->isPartialReduction()) {
4451 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4454 MulAccCost = Ctx.TTI.getPartialReductionCost(
4455 Opcode, SrcTy, SrcTy2, RedTy, VF,
4464 ? std::optional{Red->getFastMathFlags()}
4470 (Ext0->getOpcode() != Ext1->getOpcode() ||
4471 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4475 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4477 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4485 ExtCost += Ext0->computeCost(VF, Ctx);
4487 ExtCost += Ext1->computeCost(VF, Ctx);
4489 ExtCost += OuterExt->computeCost(VF, Ctx);
4491 return MulAccCost.
isValid() &&
4492 MulAccCost < ExtCost + MulCost + RedCost;
4497 VPValue *VecOp = Red->getVecOp();
4504 assert(Opcode == Instruction::FAdd &&
4505 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4514 if (RecipeA && RecipeB &&
4515 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4537 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4539 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4553 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4554 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4555 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4556 Mul->setOperand(1, ExtB);
4566 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4571 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4578 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4587 if (!Red->isPartialReduction() &&
4596 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4605 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4606 Ext0->getOpcode() == Ext1->getOpcode() &&
4607 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4609 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4610 *Ext0, *Ext0, Ext0->getDebugLoc());
4611 NewExt0->insertBefore(Ext0);
4616 Ext->getResultType(),
nullptr, *Ext1,
4617 *Ext1, Ext1->getDebugLoc());
4620 Mul->setOperand(0, NewExt0);
4621 Mul->setOperand(1, NewExt1);
4622 Red->setOperand(1,
Mul);
4635 auto IP = std::next(Red->getIterator());
4636 auto *VPBB = Red->getParent();
4646 Red->replaceAllUsesWith(AbstractR);
4676 for (
VPValue *VPV : VPValues) {
4685 if (
User->usesScalars(VPV))
4688 HoistPoint = HoistBlock->
begin();
4692 "All users must be in the vector preheader or dominated by it");
4697 VPV->replaceUsesWithIf(Broadcast,
4698 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4699 return Broadcast != &U && !U.usesScalars(VPV);
4716 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4717 RepR->getOpcode() != Instruction::Load)
4720 VPValue *Addr = RepR->getOperand(0);
4723 if (!
Loc.AATags.Scope)
4728 if (R.mayWriteToMemory()) {
4730 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4738 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4742 const AAMDNodes &LoadAA = LoadLoc.AATags;
4758 return CommonMetadata;
4761template <
unsigned Opcode>
4766 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4767 "Only Load and Store opcodes supported");
4768 constexpr bool IsLoad = (Opcode == Instruction::Load);
4774 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4779 for (
auto Recipes :
Groups) {
4780 if (Recipes.size() < 2)
4788 VPValue *MaskI = RecipeI->getMask();
4789 Type *TypeI = GetLoadStoreValueType(RecipeI);
4795 bool HasComplementaryMask =
false;
4800 VPValue *MaskJ = RecipeJ->getMask();
4801 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4802 if (TypeI == TypeJ) {
4812 if (HasComplementaryMask) {
4813 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4823template <
typename InstType>
4841 for (
auto &Group :
Groups) {
4861 return R->isSingleScalar() == IsSingleScalar;
4863 "all members in group must agree on IsSingleScalar");
4868 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4869 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4871 UnpredicatedLoad->insertBefore(EarliestLoad);
4875 Load->replaceAllUsesWith(UnpredicatedLoad);
4876 Load->eraseFromParent();
4886 if (!StoreLoc || !StoreLoc->AATags.Scope)
4892 StoresToSink.
end());
4896 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4910 for (
auto &Group :
Groups) {
4923 VPValue *SelectedValue = Group[0]->getOperand(0);
4926 bool IsSingleScalar = Group[0]->isSingleScalar();
4927 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4928 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4929 "all members in group must agree on IsSingleScalar");
4930 VPValue *Mask = Group[
I]->getMask();
4932 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4941 StoreWithMinAlign->getUnderlyingInstr(),
4942 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4943 nullptr, *LastStore, CommonMetadata);
4944 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4948 Store->eraseFromParent();
4955 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4956 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5021 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5023 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5030 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5040 DefR->replaceUsesWithIf(
5041 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5043 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5057 for (
VPValue *Def : R.definedValues()) {
5070 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5072 return U->usesScalars(Def) &&
5075 if (
none_of(Def->users(), IsCandidateUnpackUser))
5082 Unpack->insertAfter(&R);
5083 Def->replaceUsesWithIf(Unpack,
5084 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5085 return IsCandidateUnpackUser(&U);
5095 bool RequiresScalarEpilogue,
5107 assert(StepR->getParent() == VectorPHVPBB &&
5108 "Step must be defined in VectorPHVPBB");
5110 InsertPt = std::next(StepR->getIterator());
5112 VPBuilder Builder(VectorPHVPBB, InsertPt);
5120 if (TailByMasking) {
5121 TC = Builder.createAdd(
5132 Builder.createNaryOp(Instruction::URem, {TC, Step},
5141 if (RequiresScalarEpilogue) {
5143 "requiring scalar epilogue is not supported with fail folding");
5146 R = Builder.createSelect(IsZero, Step, R);
5160 "VF and VFxUF must be materialized together");
5172 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5179 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5183 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5187 VPValue *MulByUF = Builder.createOverflowingOp(
5199 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5207 const SCEV *Expr = ExpSCEV->getSCEV();
5210 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5215 ExpSCEV->eraseFromParent();
5218 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5219 "before any VPIRInstructions");
5222 auto EI = Entry->begin();
5232 return ExpandedSCEVs;
5244 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5248 return Member0Op == OpV;
5252 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5255 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5272 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5275 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5280 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5281 const auto &[
OpIdx, OpV] =
P;
5296 if (!InterleaveR || InterleaveR->
getMask())
5297 return std::nullopt;
5299 Type *GroupElementTy =
nullptr;
5303 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5304 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5306 return std::nullopt;
5311 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5312 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5314 return std::nullopt;
5318 if (IG->getFactor() != IG->getNumMembers())
5319 return std::nullopt;
5325 assert(
Size.isScalable() == VF.isScalable() &&
5326 "if Size is scalable, VF must be scalable and vice versa");
5327 return Size.getKnownMinValue();
5331 unsigned MinVal = VF.getKnownMinValue();
5333 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5336 return std::nullopt;
5344 return RepR && RepR->isSingleScalar();
5351 auto *R = V->getDefiningRecipe();
5360 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5361 WideMember0->setOperand(
5370 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5372 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5373 false, {}, LoadGroup->getDebugLoc());
5374 L->insertBefore(LoadGroup);
5380 assert(RepR->isSingleScalar() &&
5382 "must be a single scalar load");
5383 NarrowedOps.
insert(RepR);
5388 VPValue *PtrOp = WideLoad->getAddr();
5390 PtrOp = VecPtr->getOperand(0);
5395 nullptr, {}, *WideLoad);
5396 N->insertBefore(WideLoad);
5401std::unique_ptr<VPlan>
5421 "unexpected branch-on-count");
5425 std::optional<ElementCount> VFToOptimize;
5442 if (R.mayWriteToMemory() && !InterleaveR)
5457 std::optional<ElementCount> NarrowedVF =
5459 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5461 VFToOptimize = NarrowedVF;
5464 if (InterleaveR->getStoredValues().empty())
5469 auto *Member0 = InterleaveR->getStoredValues()[0];
5479 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5482 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5483 return IR && IR->getInterleaveGroup()->isFull() &&
5484 IR->getVPValue(Op.index()) == Op.value();
5493 VFToOptimize->isScalable()))
5498 if (StoreGroups.
empty())
5502 bool RequiresScalarEpilogue =
5513 std::unique_ptr<VPlan> NewPlan;
5515 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5516 Plan.
setVF(*VFToOptimize);
5517 NewPlan->removeVF(*VFToOptimize);
5523 for (
auto *StoreGroup : StoreGroups) {
5529 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5530 false, {}, StoreGroup->getDebugLoc());
5531 S->insertBefore(StoreGroup);
5532 StoreGroup->eraseFromParent();
5544 if (VFToOptimize->isScalable()) {
5557 RequiresScalarEpilogue, Step);
5559 Inc->setOperand(1, Step);
5565 "All VPVectorPointerRecipes should have been removed");
5581 "must have a BranchOnCond");
5584 if (VF.
isScalable() && VScaleForTuning.has_value())
5585 VectorStep *= *VScaleForTuning;
5586 assert(VectorStep > 0 &&
"trip count should not be zero");
5590 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5597 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5609 "Cannot handle loops with uncountable early exits");
5682 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5696 "vector.recur.extract.for.phi");
5714 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
bool UseMax,
5715 bool Signed) -> std::optional<APInt> {
5726 return std::nullopt;
5734 PhiR->getRecurrenceKind()))
5743 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5744 VPValue *CondSelect = BackedgeVal;
5759 VPValue *
IV = TrueVal == PhiR ? FalseVal : TrueVal;
5773 bool UseSigned =
true;
5774 std::optional<APInt> SentinelVal =
5775 CheckSentinel(IVSCEV, UseMax,
true);
5777 SentinelVal = CheckSentinel(IVSCEV, UseMax,
false);
5786 if (AR->hasNoSignedWrap())
5788 else if (AR->hasNoUnsignedWrap())
5801 VPIRFlags Flags(MinMaxKind,
false,
false,
5810 VPValue *StartVPV = PhiR->getStartValue();
5818 MiddleBuilder.
createSelect(Cmp, ReducedIV, StartVPV, ExitDL);
5827 AnyOfPhi->insertAfter(PhiR);
5831 if (TrueVal == PhiR)
5838 {StartVPV, ReducedIV, OrVal}, {}, ExitDL);
5850 *CondSelect,
RdxUnordered{1}, {}, PhiR->hasUsesOutsideReductionChain());
5851 NewPhiR->insertBefore(PhiR);
5852 PhiR->replaceAllUsesWith(NewPhiR);
5853 PhiR->eraseFromParent();
5861struct ExtendedReductionOperand {
5864 std::array<VPWidenCastRecipe *, 2> CastRecipes = {};
5870struct VPPartialReductionChain {
5873 VPWidenRecipe *ReductionBinOp;
5875 ExtendedReductionOperand ExtendedOp;
5876 unsigned ScaleFactor;
5899 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5902 BinOp->
setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5915 if (!
Mul->hasOneUse() ||
5916 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5917 MulLHS->getOpcode() != MulRHS->getOpcode())
5920 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5921 MulLHS->getOperand(0),
5922 Ext->getResultType()));
5923 Mul->setOperand(1, MulLHS == MulRHS
5924 ?
Mul->getOperand(0)
5925 : Builder.createWidenCast(MulRHS->getOpcode(),
5926 MulRHS->getOperand(0),
5927 Ext->getResultType()));
5936static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
5965 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
5975 Builder.insert(NegRecipe);
5980 BinOp = optimizeExtendsForPartialReduction(BinOp, TypeInfo);
5990 assert((!ExitValue || IsLastInChain) &&
5991 "if we found ExitValue, it must match RdxPhi's backedge value");
6002 PartialRed->insertBefore(WidenRecipe);
6019 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6020 StartInst->setOperand(2, NewScaleFactor);
6028 VPValue *OldStartValue = StartInst->getOperand(0);
6029 StartInst->setOperand(0, StartInst->getOperand(1));
6033 assert(RdxResult &&
"Could not find reduction result");
6036 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6042 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6048static bool isValidPartialReduction(
const VPPartialReductionChain &Chain,
6052 -> std::pair<Type *, TargetTransformInfo::PartialReductionExtendKind> {
6058 return {ExtOpType, ExtKind};
6060 ExtendedReductionOperand ExtendedOp = Chain.ExtendedOp;
6064 Type *ExtOpTypeA, *ExtOpTypeB;
6066 std::tie(ExtOpTypeA, ExtKindA) = GetExtInfo(ExtendA);
6067 std::tie(ExtOpTypeB, ExtKindB) = GetExtInfo(ExtendB);
6071 if (!ExtendB && ExtendedOp.BinOp &&
6072 ExtendedOp.BinOp != Chain.ReductionBinOp) {
6080 ExtOpTypeB = ExtOpTypeA;
6081 ExtKindB = ExtKindA;
6084 std::optional<unsigned> BinOpc;
6085 if (ExtendedOp.BinOp && ExtendedOp.BinOp != Chain.ReductionBinOp)
6093 WidenRecipe->
getOpcode(), ExtOpTypeA, ExtOpTypeB, PhiType, VF,
6094 ExtKindA, ExtKindB, BinOpc, CostCtx.
CostKind,
6096 ? std::optional{WidenRecipe->getFastMathFlags()}
6122static std::optional<ExtendedReductionOperand>
6125 "Op should be operand of UpdateR");
6127 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6130 VPValue *CastSource = CastRecipe->getOperand(0);
6139 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6140 }
else if (UpdateR->
getOpcode() == Instruction::Add ||
6141 UpdateR->
getOpcode() == Instruction::FAdd) {
6145 return ExtendedReductionOperand{UpdateR, {CastRecipe,
nullptr}};
6149 if (!
Op->hasOneUse())
6150 return std::nullopt;
6159 return std::nullopt;
6169 return std::nullopt;
6179 return std::nullopt;
6183 if (Cast && OuterExtKind &&
6184 getPartialReductionExtendKind(Cast) != OuterExtKind)
6185 return std::nullopt;
6187 return ExtendedReductionOperand{BinOp, {LHSCast, RHSCast}};
6194static std::optional<SmallVector<VPPartialReductionChain>>
6202 return std::nullopt;
6212 VPValue *CurrentValue = ExitValue;
6213 while (CurrentValue != RedPhiR) {
6216 return std::nullopt;
6223 std::optional<ExtendedReductionOperand> ExtendedOp =
6224 matchExtendedReductionOperand(UpdateR,
Op);
6226 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6228 return std::nullopt;
6233 ExtendedOp->CastRecipes[0]->getOperand(0));
6236 return std::nullopt;
6238 VPPartialReductionChain Chain(
6239 {UpdateR, *ExtendedOp,
6241 if (!isValidPartialReduction(Chain, PhiType, CostCtx,
Range))
6242 return std::nullopt;
6245 CurrentValue = PrevValue;
6250 std::reverse(Chains.
begin(), Chains.
end());
6269 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6270 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6273 if (ChainsByPhi.
empty())
6280 for (
const auto &[
_, Chains] : ChainsByPhi)
6281 for (
const VPPartialReductionChain &Chain : Chains) {
6282 PartialReductionOps.
insert(Chain.ExtendedOp.BinOp);
6283 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6291 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6300 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6301 for (
const VPPartialReductionChain &Chain : Chains) {
6302 if (!
all_of(Chain.ExtendedOp.CastRecipes, ExtendUsersValid)) {
6306 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6308 return PhiR == RedPhiR;
6310 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6316 if (!
all_of(Chain.ReductionBinOp->
users(), UseIsValid)) {
6325 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6326 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6335 for (
auto &[Phi, Chains] : ChainsByPhi)
6336 for (
const VPPartialReductionChain &Chain : Chains)
6337 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ ReadOnly
No side effects to worry about, so we can process any uncountable exits in the loop and branch either...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...