45#define DEBUG_TYPE "vector-combine"
51STATISTIC(NumVecLoad,
"Number of vector loads formed");
52STATISTIC(NumVecCmp,
"Number of vector compares formed");
53STATISTIC(NumVecBO,
"Number of vector binops formed");
54STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
55STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
56STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
57STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
58STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
62 cl::desc(
"Disable all vector combine transforms"));
66 cl::desc(
"Disable binop extract to shuffle transforms"));
70 cl::desc(
"Max number of instructions to scan for vector combining."));
72static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
80 bool TryEarlyFoldsOnly)
83 SQ(*
DL, nullptr, &DT, &AC),
84 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
91 const TargetTransformInfo &TTI;
92 const DominatorTree &DT;
96 const SimplifyQuery SQ;
100 bool TryEarlyFoldsOnly;
102 InstructionWorklist Worklist;
111 bool vectorizeLoadInsert(Instruction &
I);
112 bool widenSubvectorLoad(Instruction &
I);
113 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
114 ExtractElementInst *Ext1,
115 unsigned PreferredExtractIndex)
const;
116 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
117 const Instruction &
I,
118 ExtractElementInst *&ConvertToShuffle,
119 unsigned PreferredExtractIndex);
122 bool foldExtractExtract(Instruction &
I);
123 bool foldInsExtFNeg(Instruction &
I);
124 bool foldInsExtBinop(Instruction &
I);
125 bool foldInsExtVectorToShuffle(Instruction &
I);
126 bool foldBitOpOfCastops(Instruction &
I);
127 bool foldBitOpOfCastConstant(Instruction &
I);
128 bool foldBitcastShuffle(Instruction &
I);
129 bool scalarizeOpOrCmp(Instruction &
I);
130 bool scalarizeVPIntrinsic(Instruction &
I);
131 bool foldExtractedCmps(Instruction &
I);
132 bool foldSelectsFromBitcast(Instruction &
I);
133 bool foldBinopOfReductions(Instruction &
I);
134 bool foldSingleElementStore(Instruction &
I);
135 bool scalarizeLoad(Instruction &
I);
136 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
138 bool scalarizeExtExtract(Instruction &
I);
139 bool foldConcatOfBoolMasks(Instruction &
I);
140 bool foldPermuteOfBinops(Instruction &
I);
141 bool foldShuffleOfBinops(Instruction &
I);
142 bool foldShuffleOfSelects(Instruction &
I);
143 bool foldShuffleOfCastops(Instruction &
I);
144 bool foldShuffleOfShuffles(Instruction &
I);
145 bool foldPermuteOfIntrinsic(Instruction &
I);
146 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
147 bool foldShuffleOfIntrinsics(Instruction &
I);
148 bool foldShuffleToIdentity(Instruction &
I);
149 bool foldShuffleFromReductions(Instruction &
I);
150 bool foldShuffleChainsToReduce(Instruction &
I);
151 bool foldCastFromReductions(Instruction &
I);
152 bool foldSignBitReductionCmp(Instruction &
I);
153 bool foldReductionZeroTest(Instruction &
I);
154 bool foldICmpEqZeroVectorReduce(Instruction &
I);
155 bool foldEquivalentReductionCmp(Instruction &
I);
156 bool foldReduceAddCmpZero(Instruction &
I);
157 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
158 bool foldInterleaveIntrinsics(Instruction &
I);
159 bool foldDeinterleaveIntrinsics(Instruction &
I);
160 bool foldBitcastOfVPLoad(Instruction &
I);
161 bool foldBitOrderReverseAndSwap(Instruction &
I);
162 bool shrinkType(Instruction &
I);
163 bool shrinkLoadForShuffles(Instruction &
I);
164 bool shrinkPhiOfShuffles(Instruction &
I);
166 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
172 Worklist.pushUsersToWorkList(*NewI);
173 Worklist.pushValue(NewI);
190 SmallPtrSet<Value *, 4> Visited;
195 OpI,
nullptr,
nullptr, [&](
Value *V) {
200 NextInst = NextInst->getNextNode();
205 Worklist.pushUsersToWorkList(*OpI);
206 Worklist.pushValue(OpI);
226 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
227 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
233 Type *ScalarTy = Load->getType()->getScalarType();
235 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
236 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
243bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
269 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
272 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
273 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
274 unsigned OffsetEltIndex = 0;
282 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
283 APInt
Offset(OffsetBitWidth, 0);
293 uint64_t ScalarSizeInBytes = ScalarSize / 8;
294 if (
Offset.urem(ScalarSizeInBytes) != 0)
298 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
299 if (OffsetEltIndex >= MinVecNumElts)
316 unsigned AS =
Load->getPointerAddressSpace();
335 unsigned OutputNumElts = Ty->getNumElements();
337 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
338 Mask[0] = OffsetEltIndex;
345 if (OldCost < NewCost || !NewCost.
isValid())
356 replaceValue(
I, *VecLd);
364bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
367 if (!Shuf->isIdentityWithPadding())
373 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
374 return M >= (int)(NumOpElts);
385 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
394 unsigned AS =
Load->getPointerAddressSpace();
409 if (OldCost < NewCost || !NewCost.
isValid())
416 replaceValue(
I, *VecLd);
423ExtractElementInst *VectorCombine::getShuffleExtract(
424 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
428 assert(Index0C && Index1C &&
"Expected constant extract indexes");
430 unsigned Index0 = Index0C->getZExtValue();
431 unsigned Index1 = Index1C->getZExtValue();
434 if (Index0 == Index1)
458 if (PreferredExtractIndex == Index0)
460 if (PreferredExtractIndex == Index1)
464 return Index0 > Index1 ? Ext0 : Ext1;
472bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
473 ExtractElementInst *Ext1,
474 const Instruction &
I,
475 ExtractElementInst *&ConvertToShuffle,
476 unsigned PreferredExtractIndex) {
479 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
481 unsigned Opcode =
I.getOpcode();
494 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
495 "Expected a compare");
505 unsigned Ext0Index = Ext0IndexC->getZExtValue();
506 unsigned Ext1Index = Ext1IndexC->getZExtValue();
520 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
521 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
522 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
527 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
532 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
534 OldCost = CheapExtractCost + ScalarOpCost;
535 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
539 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
540 NewCost = VectorOpCost + CheapExtractCost +
545 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
546 if (ConvertToShuffle) {
558 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
560 ShuffleMask[BestInsIndex] = BestExtIndex;
562 VecTy, VecTy, ShuffleMask,
CostKind, 0,
563 nullptr, {ConvertToShuffle});
566 VecTy, VecTy, {},
CostKind, 0,
nullptr,
574 return OldCost < NewCost;
586 ShufMask[NewIndex] = OldIndex;
587 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
639 V1,
"foldExtExtBinop");
644 VecBOInst->copyIRFlags(&
I);
650bool VectorCombine::foldExtractExtract(Instruction &
I) {
671 unsigned NumElts = FixedVecTy->getNumElements();
672 if (C0 >= NumElts || C1 >= NumElts)
688 ExtractElementInst *ExtractToChange;
689 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
695 if (ExtractToChange) {
696 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
701 if (ExtractToChange == Ext0)
710 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
711 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
714 replaceValue(
I, *NewExt);
720bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
723 uint64_t ExtIdx, InsIdx;
738 auto *DstVecScalarTy = DstVecTy->getScalarType();
740 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
745 unsigned NumDstElts = DstVecTy->getNumElements();
746 unsigned NumSrcElts = SrcVecTy->getNumElements();
747 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
753 SmallVector<int>
Mask(NumDstElts);
754 std::iota(
Mask.begin(),
Mask.end(), 0);
755 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
771 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
774 SmallVector<int> SrcMask;
777 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
779 DstVecTy, SrcVecTy, SrcMask,
CostKind);
783 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
785 if (NewCost > OldCost)
788 Value *NewShuf, *LenChgShuf =
nullptr;
802 replaceValue(
I, *NewShuf);
808bool VectorCombine::foldInsExtBinop(Instruction &
I) {
809 BinaryOperator *VecBinOp, *SclBinOp;
841 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
843 if (NewCost > OldCost)
854 NewInst->copyIRFlags(VecBinOp);
855 NewInst->andIRFlags(SclBinOp);
860 replaceValue(
I, *NewBO);
866bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
869 if (!BinOp || !BinOp->isBitwiseLogicOp())
875 if (!LHSCast || !RHSCast) {
876 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
882 if (CastOpcode != RHSCast->getOpcode())
886 switch (CastOpcode) {
887 case Instruction::BitCast:
888 case Instruction::Trunc:
889 case Instruction::SExt:
890 case Instruction::ZExt:
896 Value *LHSSrc = LHSCast->getOperand(0);
897 Value *RHSSrc = RHSCast->getOperand(0);
903 auto *SrcTy = LHSSrc->
getType();
904 auto *DstTy =
I.getType();
907 if (CastOpcode != Instruction::BitCast &&
912 if (!SrcTy->getScalarType()->isIntegerTy() ||
913 !DstTy->getScalarType()->isIntegerTy())
928 LHSCastCost + RHSCastCost;
939 if (!LHSCast->hasOneUse())
940 NewCost += LHSCastCost;
941 if (!RHSCast->hasOneUse())
942 NewCost += RHSCastCost;
945 <<
" NewCost=" << NewCost <<
"\n");
947 if (NewCost > OldCost)
952 BinOp->getName() +
".inner");
954 NewBinOp->copyIRFlags(BinOp);
968 replaceValue(
I, *Result);
977bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
993 switch (CastOpcode) {
994 case Instruction::BitCast:
995 case Instruction::ZExt:
996 case Instruction::SExt:
997 case Instruction::Trunc:
1003 Value *LHSSrc = LHSCast->getOperand(0);
1005 auto *SrcTy = LHSSrc->
getType();
1006 auto *DstTy =
I.getType();
1009 if (CastOpcode != Instruction::BitCast &&
1014 if (!SrcTy->getScalarType()->isIntegerTy() ||
1015 !DstTy->getScalarType()->isIntegerTy())
1019 PreservedCastFlags RHSFlags;
1044 if (!LHSCast->hasOneUse())
1045 NewCost += LHSCastCost;
1047 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1048 <<
" NewCost=" << NewCost <<
"\n");
1050 if (NewCost > OldCost)
1055 LHSSrc, InvC,
I.getName() +
".inner");
1057 NewBinOp->copyIRFlags(&
I);
1077 replaceValue(
I, *Result);
1084bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1098 if (!DestTy || !SrcTy)
1101 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1102 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1103 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1113 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1114 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1118 SmallVector<int, 16> NewMask;
1119 if (DestEltSize <= SrcEltSize) {
1122 if (SrcEltSize % DestEltSize != 0)
1124 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1129 if (DestEltSize % SrcEltSize != 0)
1131 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1138 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1139 auto *NewShuffleTy =
1141 auto *OldShuffleTy =
1143 unsigned NumOps = IsUnary ? 1 : 2;
1153 TargetTransformInfo::CastContextHint::None,
1158 TargetTransformInfo::CastContextHint::None,
1161 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1162 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1164 if (NewCost > OldCost || !NewCost.
isValid())
1172 replaceValue(
I, *Shuf);
1179bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1193 if (!ScalarOp0 || !ScalarOp1)
1201 auto IsAllTrueMask = [](
Value *MaskVal) {
1204 return ConstValue->isAllOnesValue();
1218 SmallVector<int>
Mask;
1220 Mask.resize(FVTy->getNumElements(), 0);
1229 Args.push_back(
V->getType());
1230 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1235 std::optional<unsigned> FunctionalOpcode =
1237 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1238 if (!FunctionalOpcode) {
1247 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1257 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1259 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1262 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1265 if (OldCost < NewCost || !NewCost.
isValid())
1276 bool SafeToSpeculate;
1282 *FunctionalOpcode, &VPI,
nullptr, SQ.
AC, SQ.
DT);
1283 if (!SafeToSpeculate &&
1290 {ScalarOp0, ScalarOp1})
1292 ScalarOp0, ScalarOp1);
1301bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1306 if (!UO && !BO && !CI && !
II)
1314 if (Arg->getType() !=
II->getType() &&
1324 for (User *U :
I.users())
1331 std::optional<uint64_t>
Index;
1333 auto Ops =
II ?
II->args() :
I.operands();
1337 uint64_t InsIdx = 0;
1342 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1348 else if (InsIdx != *Index)
1365 if (!
Index.has_value())
1369 Type *ScalarTy = VecTy->getScalarType();
1370 assert(VecTy->isVectorTy() &&
1373 "Unexpected types for insert element into binop or cmp");
1375 unsigned Opcode =
I.getOpcode();
1383 }
else if (UO || BO) {
1387 IntrinsicCostAttributes ScalarICA(
1388 II->getIntrinsicID(), ScalarTy,
1391 IntrinsicCostAttributes VectorICA(
1392 II->getIntrinsicID(), VecTy,
1399 Value *NewVecC =
nullptr;
1401 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1404 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1406 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1420 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1422 II->getIntrinsicID(), Idx, &
TTI)))
1425 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1426 OldCost += InsertCost;
1427 NewCost += !
Op->hasOneUse() * InsertCost;
1431 if (OldCost < NewCost || !NewCost.
isValid())
1441 ++NumScalarIntrinsic;
1451 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1457 Scalar->setName(
I.getName() +
".scalar");
1462 ScalarInst->copyIRFlags(&
I);
1465 replaceValue(
I, *Insert);
1472bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1477 if (!BI || !
I.getType()->isIntegerTy(1))
1482 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1485 CmpPredicate
P0,
P1;
1497 uint64_t Index0, Index1;
1504 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1507 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1508 "Unknown ExtractElementInst");
1513 unsigned CmpOpcode =
1519 if (Index0 >= VecTy->getNumElements() || Index1 >= VecTy->getNumElements())
1531 Ext0Cost + Ext1Cost + CmpCost * 2 +
1537 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1538 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1543 ShufMask[CheapIndex] = ExpensiveIndex;
1548 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1549 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1554 if (OldCost < NewCost || !NewCost.
isValid())
1564 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1565 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1568 replaceValue(
I, *NewExt);
1595bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1602 if (!SrcVecTy || !DstVecTy)
1612 if (SrcEltBits != 32 && SrcEltBits != 64)
1615 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1632 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1635 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1638 if (!BC->hasNUsesOrMore(MinSelects))
1643 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1645 for (User *U : BC->users()) {
1650 for (User *ExtUser : Ext->users()) {
1654 Cond->getType()->isIntegerTy(1))
1659 if (CondToSelects.
empty())
1662 bool MadeChange =
false;
1663 Value *SrcVec = BC->getOperand(0);
1666 for (
auto [
Cond, Selects] : CondToSelects) {
1668 if (Selects.size() < MinSelects) {
1669 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1670 <<
"profitable (VecCost=" << VecSelCost
1671 <<
", ScalarCost=" << ScalarSelCost
1672 <<
", NumSelects=" << Selects.size() <<
")\n");
1677 auto InsertPt = std::next(BC->getIterator());
1681 InsertPt = std::next(CondInst->getIterator());
1689 for (SelectInst *Sel : Selects) {
1691 Value *Idx = Ext->getIndexOperand();
1695 replaceValue(*Sel, *NewExt);
1700 <<
" selects into vector select\n");
1714 unsigned ReductionOpc =
1720 CostBeforeReduction =
1721 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1723 CostAfterReduction =
1724 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1728 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1734 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1741 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1744 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1746 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1749 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1750 CostAfterReduction =
TTI.getMulAccReductionCost(
1751 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1754 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1758bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1761 if (BinOpOpc == Instruction::Sub)
1762 ReductionIID = Intrinsic::vector_reduce_add;
1766 if (ReductionIID == Intrinsic::vector_reduce_fadd ||
1767 ReductionIID == Intrinsic::vector_reduce_fmul)
1770 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1775 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1776 return II->getArgOperand(0);
1780 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1783 Value *
V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1788 if (
V1->getType() != VTy)
1792 unsigned ReductionOpc =
1805 CostOfRedOperand0 + CostOfRedOperand1 +
1808 if (NewCost >= OldCost || !NewCost.
isValid())
1812 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1815 if (BinOpOpc == Instruction::Or)
1822 replaceValue(
I, *Rdx);
1830 unsigned NumScanned = 0;
1831 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1840class ScalarizationResult {
1841 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1846 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1847 : Status(Status), ToFreeze(ToFreeze) {}
1850 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1851 ~ScalarizationResult() {
1852 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1855 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1856 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1857 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1858 return {StatusTy::SafeWithFreeze, ToFreeze};
1862 bool isSafe()
const {
return Status == StatusTy::Safe; }
1864 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1867 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1872 Status = StatusTy::Unsafe;
1876 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1877 assert(isSafeWithFreeze() &&
1878 "should only be used when freezing is required");
1880 "UserI must be a user of ToFreeze");
1881 IRBuilder<>::InsertPointGuard Guard(Builder);
1886 if (
U.get() == ToFreeze)
1901 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1905 if (
C->getValue().ult(NumElements))
1906 return ScalarizationResult::safe();
1907 return ScalarizationResult::unsafe();
1912 return ScalarizationResult::unsafe();
1914 APInt Zero(IntWidth, 0);
1915 APInt MaxElts(IntWidth, NumElements);
1922 return ScalarizationResult::safe();
1923 return ScalarizationResult::unsafe();
1936 if (ValidIndices.
contains(IdxRange))
1937 return ScalarizationResult::safeWithFreeze(IdxBase);
1938 return ScalarizationResult::unsafe();
1950 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1962bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1974 if (!
match(
SI->getValueOperand(),
1981 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1984 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1985 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1986 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1989 auto ScalarizableIdx =
1991 if (ScalarizableIdx.isUnsafe() ||
1998 Worklist.
push(Load);
2000 if (ScalarizableIdx.isSafeWithFreeze())
2003 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
2004 {ConstantInt::get(Idx->getType(), 0), Idx});
2008 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
2011 replaceValue(
I, *NSI);
2021bool VectorCombine::scalarizeLoad(Instruction &
I) {
2031 if (!LI->isSimple() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2034 bool AllExtracts =
true;
2035 bool AllBitcasts =
true;
2037 unsigned NumInstChecked = 0;
2042 for (User *U : LI->users()) {
2044 if (!UI || UI->getParent() != LI->getParent())
2049 if (UI->use_empty())
2053 AllExtracts =
false;
2055 AllBitcasts =
false;
2059 for (Instruction &
I :
2060 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2067 LastCheckedInst = UI;
2072 return scalarizeLoadExtract(LI, VecTy, Ptr);
2074 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2079bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2084 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2087 for (
auto &Pair : NeedFreeze)
2088 Pair.second.discard();
2096 for (User *U : LI->
users()) {
2101 if (ScalarIdx.isUnsafe())
2103 if (ScalarIdx.isSafeWithFreeze()) {
2104 NeedFreeze.try_emplace(UI, ScalarIdx);
2105 ScalarIdx.discard();
2111 Index ?
Index->getZExtValue() : -1);
2119 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2120 <<
"\n LoadExtractCost: " << OriginalCost
2121 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2123 if (ScalarizedCost >= OriginalCost)
2130 Type *ElemType = VecTy->getElementType();
2133 for (User *U : LI->
users()) {
2135 Value *Idx = EI->getIndexOperand();
2138 auto It = NeedFreeze.find(EI);
2139 if (It != NeedFreeze.end())
2146 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2148 Align ScalarOpAlignment =
2150 NewLoad->setAlignment(ScalarOpAlignment);
2153 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2158 replaceValue(*EI, *NewLoad,
false);
2161 FailureGuard.release();
2166bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2172 Type *TargetScalarType =
nullptr;
2173 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2175 for (User *U : LI->
users()) {
2178 Type *DestTy = BC->getDestTy();
2182 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2183 if (DestBitWidth != VecBitWidth)
2187 if (!TargetScalarType)
2188 TargetScalarType = DestTy;
2189 else if (TargetScalarType != DestTy)
2197 if (!TargetScalarType)
2205 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2206 <<
"\n OriginalCost: " << OriginalCost
2207 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2209 if (ScalarizedCost >= OriginalCost)
2220 ScalarLoad->copyMetadata(*LI);
2223 for (User *U : LI->
users()) {
2225 replaceValue(*BC, *ScalarLoad,
false);
2231bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2246 Type *ScalarDstTy = DstTy->getElementType();
2247 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2253 unsigned ExtCnt = 0;
2254 bool ExtLane0 =
false;
2255 for (User *U : Ext->users()) {
2269 Instruction::And, ScalarDstTy,
CostKind,
2272 (ExtCnt - ExtLane0) *
2274 Instruction::LShr, ScalarDstTy,
CostKind,
2277 if (ScalarCost > VectorCost)
2280 Value *ScalarV = Ext->getOperand(0);
2287 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2288 bool AllExtractsTriggerUB =
true;
2289 ExtractElementInst *LastExtract =
nullptr;
2291 for (User *U : Ext->users()) {
2294 AllExtractsTriggerUB =
false;
2298 if (!LastExtract || LastExtract->
comesBefore(Extract))
2299 LastExtract = Extract;
2301 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2302 !AllExtractsTriggerUB ||
2310 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2311 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2314 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2315 for (User *U : Ext->users()) {
2321 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2322 : (Idx * SrcEltSizeInBits);
2325 U->replaceAllUsesWith(
And);
2333bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2334 Type *Ty =
I.getType();
2339 if (
DL->isBigEndian())
2350 uint64_t ShAmtX = 0;
2358 uint64_t ShAmtY = 0;
2366 if (ShAmtX > ShAmtY) {
2374 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2375 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2380 MaskTy->getNumElements() != ShAmtDiff ||
2381 MaskTy->getNumElements() > (
BitWidth / 2))
2386 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2387 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2390 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2407 if (Ty != ConcatIntTy)
2413 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2414 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2417 if (NewCost > OldCost)
2427 if (Ty != ConcatIntTy) {
2437 replaceValue(
I, *Result);
2443bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2444 BinaryOperator *BinOp;
2445 ArrayRef<int> OuterMask;
2453 Value *Op00, *Op01, *Op10, *Op11;
2454 ArrayRef<int> Mask0, Mask1;
2459 if (!Match0 && !Match1)
2472 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2475 unsigned NumSrcElts = BinOpTy->getNumElements();
2480 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2484 SmallVector<int> NewMask0, NewMask1;
2485 for (
int M : OuterMask) {
2486 if (M < 0 || M >= (
int)NumSrcElts) {
2490 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2491 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2495 unsigned NumOpElts = Op0Ty->getNumElements();
2496 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2497 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2499 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2500 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2509 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2510 0,
nullptr, {BinOp}, &
I);
2512 NewCost += BinOpCost;
2518 OldCost += Shuf0Cost;
2520 NewCost += Shuf0Cost;
2526 OldCost += Shuf1Cost;
2528 NewCost += Shuf1Cost;
2536 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2540 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2542 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2543 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2547 if (NewCost > OldCost)
2558 NewInst->copyIRFlags(BinOp);
2562 replaceValue(
I, *NewBO);
2568bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2569 ArrayRef<int> OldMask;
2576 if (
LHS->getOpcode() !=
RHS->getOpcode())
2580 bool IsCommutative =
false;
2589 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2600 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2603 bool SameBinOp =
LHS ==
RHS;
2604 unsigned NumSrcElts = BinOpTy->getNumElements();
2607 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2610 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2611 if (M >= (
int)NumSrcElts)
2615 SmallVector<int> NewMask0(OldMask);
2624 SmallVector<int> NewMask1(OldMask);
2643 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2653 ArrayRef<int> InnerMask;
2655 m_Mask(InnerMask)))) &&
2658 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2670 bool ReducedInstCount =
false;
2671 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2672 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2673 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2674 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2675 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2680 auto *ShuffleCmpTy =
2683 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2684 if (!SingleSrcBinOp)
2694 PredLHS,
CostKind, Op0Info, Op1Info);
2704 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2711 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2720 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2724 NewInst->copyIRFlags(
LHS);
2725 NewInst->andIRFlags(
RHS);
2730 replaceValue(
I, *NewBO);
2737bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2739 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2750 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2756 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2757 ((SI0FOp !=
nullptr) &&
2758 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2764 auto SelOp = Instruction::Select;
2772 CostSel1 + CostSel2 +
2774 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2778 Mask,
CostKind, 0,
nullptr, {C1, C2});
2788 if (!Sel1->hasOneUse())
2789 NewCost += CostSel1;
2790 if (!Sel2->hasOneUse())
2791 NewCost += CostSel2;
2794 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2796 if (NewCost > OldCost)
2805 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2806 SI0FOp->getFastMathFlags());
2808 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2813 replaceValue(
I, *NewSel);
2819bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2821 ArrayRef<int> OldMask;
2830 if (!C0 || (IsBinaryShuffle && !C1))
2837 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2840 if (IsBinaryShuffle) {
2841 if (C0->getSrcTy() != C1->getSrcTy())
2844 if (Opcode != C1->getOpcode()) {
2846 Opcode = Instruction::SExt;
2855 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2858 unsigned NumSrcElts = CastSrcTy->getNumElements();
2859 unsigned NumDstElts = CastDstTy->getNumElements();
2860 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2861 "Only bitcasts expected to alter src/dst element counts");
2865 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2866 (NumDstElts % NumSrcElts) != 0)
2869 SmallVector<int, 16> NewMask;
2870 if (NumSrcElts >= NumDstElts) {
2873 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2874 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2879 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2880 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2885 auto *NewShuffleDstTy =
2894 if (IsBinaryShuffle)
2909 if (IsBinaryShuffle) {
2919 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2921 if (NewCost > OldCost)
2925 if (IsBinaryShuffle)
2935 NewInst->copyIRFlags(C0);
2936 if (IsBinaryShuffle)
2937 NewInst->andIRFlags(C1);
2941 replaceValue(
I, *Cast);
2951bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2952 ArrayRef<int> OuterMask;
2953 Value *OuterV0, *OuterV1;
2958 ArrayRef<int> InnerMask0, InnerMask1;
2959 Value *X0, *X1, *Y0, *Y1;
2964 if (!Match0 && !Match1)
2969 SmallVector<int, 16> PoisonMask1;
2974 InnerMask1 = PoisonMask1;
2978 X0 = Match0 ? X0 : OuterV0;
2979 Y0 = Match0 ? Y0 : OuterV0;
2980 X1 = Match1 ? X1 : OuterV1;
2981 Y1 = Match1 ? Y1 : OuterV1;
2985 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2989 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2990 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2995 SmallVector<int, 16> NewMask(OuterMask);
2996 Value *NewX =
nullptr, *NewY =
nullptr;
2997 for (
int &M : NewMask) {
2998 Value *Src =
nullptr;
2999 if (0 <= M && M < (
int)NumImmElts) {
3003 Src =
M >= (int)NumSrcElts ? Y0 : X0;
3004 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3006 }
else if (M >= (
int)NumImmElts) {
3011 Src =
M >= (int)NumSrcElts ? Y1 : X1;
3012 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
3016 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3025 if (!NewX || NewX == Src) {
3029 if (!NewY || NewY == Src) {
3048 replaceValue(
I, *NewX);
3065 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3071 nullptr, {NewX, NewY});
3073 NewCost += InnerCost0;
3075 NewCost += InnerCost1;
3078 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3080 if (NewCost > OldCost)
3084 replaceValue(
I, *Shuf);
3100bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3105 unsigned ChainLength = 0;
3106 SmallVector<int>
Mask;
3107 SmallVector<int> YMask;
3117 ArrayRef<int> OuterMask;
3118 Value *OuterV0, *OuterV1;
3119 if (ChainLength != 0 && !Trunk->
hasOneUse())
3122 m_Mask(OuterMask))))
3124 if (OuterV0->
getType() != TrunkType) {
3130 ArrayRef<int> InnerMask0, InnerMask1;
3131 Value *A0, *A1, *B0, *B1;
3136 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3137 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3138 if (Match0Leaf == Match1Leaf) {
3144 SmallVector<int> CommutedOuterMask;
3151 for (
int &M : CommutedOuterMask) {
3154 if (M < (
int)NumTrunkElts)
3159 OuterMask = CommutedOuterMask;
3178 int NumLeafElts = YType->getNumElements();
3179 SmallVector<int> LocalYMask(InnerMask1);
3180 for (
int &M : LocalYMask) {
3181 if (M >= NumLeafElts)
3191 Mask.assign(OuterMask);
3192 YMask.
assign(LocalYMask);
3193 OldCost = NewCost = LocalOldCost;
3200 SmallVector<int> NewYMask(YMask);
3202 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3203 if (LeafM == -1 || CombinedM == LeafM)
3205 if (CombinedM == -1) {
3215 SmallVector<int> NewMask;
3216 NewMask.
reserve(NumTrunkElts);
3217 for (
int M : Mask) {
3218 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3233 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3237 if (ChainLength == 1) {
3238 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3241 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3242 <<
" old cost: " << (OldCost + LocalOldCost)
3243 <<
" new cost: " << LocalNewCost <<
'\n';
3248 OldCost += LocalOldCost;
3249 NewCost = LocalNewCost;
3253 if (ChainLength <= 1)
3261 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3264 for (
int &M : Mask) {
3265 if (M >=
static_cast<int>(NumTrunkElts))
3266 M = YMask[
M - NumTrunkElts];
3270 replaceValue(
I, *Root);
3277 replaceValue(
I, *Root);
3283bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3285 ArrayRef<int> OldMask;
3295 if (IID != II1->getIntrinsicID())
3304 if (!ShuffleDstTy || !II0Ty)
3310 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3311 Value *Arg0 = II0->getArgOperand(
I);
3312 Value *Arg1 = II1->getArgOperand(
I);
3329 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3333 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3334 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3336 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3340 ShuffleDstTy->getNumElements());
3342 std::pair<Value *, Value *> OperandPair =
3343 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3344 if (!SeenOperandPairs.
insert(OperandPair).second) {
3350 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3353 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3356 if (!II0->hasOneUse())
3358 if (II1 != II0 && !II1->hasOneUse())
3362 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3365 if (NewCost > OldCost)
3369 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3370 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3374 std::pair<Value *, Value *> OperandPair =
3375 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3376 auto It = ShuffleCache.
find(OperandPair);
3377 if (It != ShuffleCache.
end()) {
3383 II1->getArgOperand(
I), OldMask);
3384 ShuffleCache[OperandPair] = Shuf;
3392 NewInst->copyIRFlags(II0);
3393 NewInst->andIRFlags(II1);
3396 replaceValue(
I, *NewIntrinsic);
3402bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3414 if (!ShuffleDstTy || !IntrinsicSrcTy)
3418 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3419 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3432 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3436 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3438 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3442 ShuffleDstTy->getNumElements());
3445 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3446 {II0->getArgOperand(
I)});
3449 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3454 if (!II0->hasOneUse())
3457 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3458 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3460 if (NewCost > OldCost)
3465 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3478 NewInst->copyIRFlags(II0);
3480 replaceValue(
I, *NewIntrinsic);
3490 int M = SV->getMaskValue(Lane);
3493 if (
static_cast<unsigned>(M) < NumElts) {
3494 V = SV->getOperand(0);
3497 V = SV->getOperand(1);
3508 auto [U, Lane] = IL;
3521 unsigned NumElts = Ty->getNumElements();
3522 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3528 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3534 unsigned NumSlices = Item.
size() / NumElts;
3539 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3540 Value *SliceV = Item[Slice * NumElts].first;
3541 if (!SliceV || SliceV->
getType() != Ty)
3543 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3544 auto [V, Lane] = Item[Slice * NumElts + Elt];
3545 if (Lane !=
static_cast<int>(Elt) || SliceV != V)
3554 const DenseSet<std::pair<Value *, Use *>> &IdentityLeafs,
3555 const DenseSet<std::pair<Value *, Use *>> &SplatLeafs,
3556 const DenseSet<std::pair<Value *, Use *>> &ConcatLeafs,
3558 auto [FrontV, FrontLane] = Item.
front();
3560 if (IdentityLeafs.contains(std::make_pair(FrontV, From))) {
3563 if (SplatLeafs.contains(std::make_pair(FrontV, From))) {
3565 return Builder.CreateShuffleVector(FrontV, Mask);
3567 if (ConcatLeafs.contains(std::make_pair(FrontV, From))) {
3571 for (
unsigned S = 0; S <
Values.size(); ++S)
3572 Values[S] = Item[S * NumElts].first;
3574 while (
Values.size() > 1) {
3577 std::iota(Mask.begin(), Mask.end(), 0);
3579 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3581 Builder.CreateShuffleVector(
Values[S * 2],
Values[S * 2 + 1], Mask);
3589 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3591 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3594 Ops[Idx] =
II->getOperand(Idx);
3598 &
I->getOperandUse(Idx), Ty, IdentityLeafs,
3599 SplatLeafs, ConcatLeafs, Builder,
TTI);
3603 for (
const auto &Lane : Item)
3616 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3626 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3631 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3645bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3647 if (!Ty ||
I.use_empty())
3651 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3655 Worklist.
push_back(std::make_pair(Start, &*
I.use_begin()));
3656 DenseSet<std::pair<Value *, Use *>> IdentityLeafs, SplatLeafs, ConcatLeafs;
3657 unsigned NumVisited = 0;
3659 while (!Worklist.
empty()) {
3664 auto Item = ItemFrom.first;
3665 auto From = ItemFrom.second;
3666 auto [FrontV, FrontLane] = Item.front();
3674 return X->getType() ==
Y->getType() &&
3679 if (FrontLane == 0 &&
3681 Ty->getNumElements() &&
3683 Value *FrontV = Item.front().first;
3684 return !
E.value().first || (IsEquiv(
E.value().first, FrontV) &&
3685 E.value().second == (int)
E.index());
3687 IdentityLeafs.
insert(std::make_pair(FrontV, From));
3692 C &&
C->getSplatValue() &&
3694 Value *FrontV = Item.front().first;
3700 SplatLeafs.
insert(std::make_pair(FrontV, From));
3705 auto [FrontV, FrontLane] = Item.front();
3706 auto [
V, Lane] = IL;
3707 return !
V || (
V == FrontV && Lane == FrontLane);
3709 SplatLeafs.
insert(std::make_pair(FrontV, From));
3715 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3716 Value *FrontV = Item.front().first;
3725 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3728 if (CI->getSrcTy()->getScalarType() !=
3733 SI->getOperand(0)->getType() !=
3740 II->getIntrinsicID() ==
3742 !
II->hasOperandBundles());
3749 BO && BO->isIntDivRem())
3756 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3757 FPToUIInst, SIToFPInst, UIToFPInst>(FrontV)) {
3765 if (DstTy && SrcTy &&
3766 SrcTy->getNumElements() == DstTy->getNumElements()) {
3768 &BitCast->getOperandUse(0));
3773 &Sel->getOperandUse(0));
3775 &Sel->getOperandUse(1));
3777 &Sel->getOperandUse(2));
3781 !
II->hasOperandBundles()) {
3782 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3786 Value *FrontV = Item.front().first;
3802 ConcatLeafs.
insert(std::make_pair(FrontV, From));
3809 if (NumVisited <= 1)
3812 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3818 SplatLeafs, ConcatLeafs, Builder, &
TTI);
3819 replaceValue(
I, *V);
3826bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3830 switch (
II->getIntrinsicID()) {
3831 case Intrinsic::vector_reduce_add:
3832 case Intrinsic::vector_reduce_mul:
3833 case Intrinsic::vector_reduce_and:
3834 case Intrinsic::vector_reduce_or:
3835 case Intrinsic::vector_reduce_xor:
3836 case Intrinsic::vector_reduce_smin:
3837 case Intrinsic::vector_reduce_smax:
3838 case Intrinsic::vector_reduce_umin:
3839 case Intrinsic::vector_reduce_umax:
3848 std::queue<Value *> Worklist;
3849 SmallPtrSet<Value *, 4> Visited;
3850 ShuffleVectorInst *Shuffle =
nullptr;
3854 while (!Worklist.empty()) {
3855 Value *CV = Worklist.front();
3867 if (CI->isBinaryOp()) {
3868 for (
auto *
Op : CI->operand_values())
3872 if (Shuffle && Shuffle != SV)
3889 for (
auto *V : Visited)
3890 for (
auto *U :
V->users())
3891 if (!Visited.contains(U) && U != &
I)
3894 FixedVectorType *VecType =
3898 FixedVectorType *ShuffleInputType =
3900 if (!ShuffleInputType)
3906 SmallVector<int> ConcatMask;
3908 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3909 bool UsesSecondVec =
3910 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3917 ShuffleInputType, ConcatMask,
CostKind);
3919 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3921 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3923 bool MadeChanges =
false;
3924 if (NewCost < OldCost) {
3928 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3929 replaceValue(*Shuffle, *NewShuffle);
3935 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3956bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3965 if (FVT->getNumElements() < 2)
3968 std::optional<Instruction::BinaryOps> CommonBinOp;
3969 std::optional<Intrinsic::ID> CommonCallOp;
3974 CommonBinOp = BO->getOpcode();
3976 CommonCallOp = MMI->getIntrinsicID();
3982 FastMathFlags CommonFMF;
3983 bool IsFloatReduction =
false;
3987 auto IsChainNode = [&](
Value *
V) {
3989 return CommonBinOp && BO->getOpcode() == *CommonBinOp;
3991 return CommonCallOp && MMI->getIntrinsicID() == *CommonCallOp;
3999 constexpr unsigned MaxChainNodes = 32;
4000 SmallSetVector<Value *, 16> Nodes;
4001 SmallSetVector<Value *, 4> Sources;
4002 unsigned NumVisited = 0;
4003 auto AddSource = [&](
Value *
V) {
4009 auto Walk = [&](
Value *
V,
auto &&Walk) ->
bool {
4012 if (++NumVisited > MaxChainNodes)
4014 if (!IsChainNode(V))
4015 return AddSource(V);
4020 if (!Walk(
U->getOperand(
I), Walk))
4029 return AddSource(V);
4031 if (!Walk(VecOpEE, Walk) || Nodes.
empty())
4038 for (
Value *V : Nodes) {
4044 if (!IsFloatReduction) {
4046 IsFloatReduction =
true;
4060 DenseMap<Value *, Demand> Demands;
4061 auto DemandOf = [&](
Value *
V) -> Demand & {
4063 Demand &
D = Demands[
V];
4064 if (
D.Lanes.getBitWidth() !=
N)
4068 DemandOf(VecOpEE).Lanes.setBit(0);
4070 Demand DV = Demands.
lookup(V);
4071 if (DV.Lanes.isZero())
4074 ArrayRef<int>
Mask = SVI->getShuffleMask();
4075 Demand &
DS = DemandOf(SVI->getOperand(0));
4076 for (
unsigned I = 0,
E =
Mask.size();
I !=
E; ++
I) {
4078 if (!DV.Lanes[
I] || Mask[
I] < 0 ||
4079 (
unsigned)Mask[
I] >=
DS.Lanes.getBitWidth())
4081 if (
DS.Lanes[Mask[
I]] || DV.Duplicates[
I])
4082 DS.Duplicates.setBit(Mask[
I]);
4083 DS.Lanes.setBit(Mask[
I]);
4087 for (
Value *
Op : {
U->getOperand(0),
U->getOperand(1)}) {
4088 Demand &DOp = DemandOf(
Op);
4090 DOp.Duplicates |= DV.Duplicates | (DOp.Lanes & DV.Lanes);
4091 DOp.Lanes |= DV.Lanes;
4098 auto CoversChain = [&](
Value *
V) {
4099 SmallVector<Value *, 8> Worklist(1, VecOpEE);
4100 SmallPtrSet<Value *, 8> Seen;
4102 while (!Worklist.empty()) {
4105 for (
unsigned I = 0;
I !=
NumOps; ++
I) {
4109 if (!Nodes.contains(
Op))
4111 Worklist.push_back(
Op);
4119 struct ReductionCut {
4123 std::optional<ReductionCut> Cut;
4124 for (
Value *S : Sources) {
4125 auto It = Demands.
find(S);
4126 if (It == Demands.
end() || It->second.Lanes.isZero())
4128 if (Cut || (!IsIdempotent && !It->second.Duplicates.isZero())) {
4132 Cut = ReductionCut{S, It->second.Lanes};
4135 for (
Value *V : Nodes) {
4138 auto It = Demands.
find(V);
4139 if (It == Demands.
end() || !It->second.Lanes.isAllOnes())
4141 if (!IsIdempotent && !It->second.Duplicates.isZero())
4143 if (!CoversChain(V))
4145 Cut = ReductionCut{
V, It->second.Lanes};
4150 if (!Cut || Cut->Elts.popcount() < 2)
4160 for (
Value *V : Nodes)
4164 bool IsPartialReduction = !Cut->Elts.isAllOnes();
4165 FixedVectorType *ReduceVecTy =
4170 SmallVector<int> ExtractMask;
4172 if (IsPartialReduction) {
4173 for (
unsigned I = 0,
E = Cut->Elts.getBitWidth();
I !=
E; ++
I)
4175 ExtractMask.push_back(
I);
4176 unsigned SubIdx = 0, SubLen;
4177 auto SK = Cut->Elts.isShiftedMask(SubIdx, SubLen)
4181 SubIdx, ReduceVecTy);
4184 IntrinsicCostAttributes ICA(
4185 ReducedOp, ReduceVecTy->getElementType(),
4189 IsFloatReduction ? CommonFMF : FastMathFlags());
4192 LLVM_DEBUG(
dbgs() <<
"Found reduction shuffle chain: " <<
I <<
"\n OldCost : "
4193 << OrigCost <<
" vs NewCost: " << NewCost <<
"\n");
4198 if (VecOpEE->
hasOneUse() ? (NewCost > OrigCost) : (NewCost >= OrigCost))
4201 Value *ReduceInput = Cut->Src;
4202 if (IsPartialReduction)
4205 Value *ReducedResult;
4206 if (IsFloatReduction) {
4208 *CommonBinOp, ReduceVecTy->getElementType(),
false,
4211 {Identity, ReduceInput}, CommonFMF);
4216 replaceValue(
I, *ReducedResult);
4225bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4230 bool TruncOnly =
false;
4233 case Intrinsic::vector_reduce_add:
4234 case Intrinsic::vector_reduce_mul:
4237 case Intrinsic::vector_reduce_and:
4238 case Intrinsic::vector_reduce_or:
4239 case Intrinsic::vector_reduce_xor:
4246 Value *ReductionSrc =
I.getOperand(0);
4258 Type *ResultTy =
I.getType();
4261 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4271 if (OldCost <= NewCost || !NewCost.
isValid())
4275 II->getIntrinsicID(), {Src});
4277 replaceValue(
I, *NewCast);
4305bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4307 IntrinsicInst *ReduceOp;
4308 const APInt *CmpVal;
4315 case Intrinsic::vector_reduce_or:
4316 case Intrinsic::vector_reduce_umax:
4317 case Intrinsic::vector_reduce_and:
4318 case Intrinsic::vector_reduce_umin:
4319 case Intrinsic::vector_reduce_add:
4330 unsigned BitWidth = VecTy->getScalarSizeInBits();
4334 unsigned NumElts = VecTy->getNumElements();
4343 case Intrinsic::vector_reduce_or:
4344 case Intrinsic::vector_reduce_umax:
4345 TreeOpcode = Instruction::Or;
4347 case Intrinsic::vector_reduce_and:
4348 case Intrinsic::vector_reduce_umin:
4349 TreeOpcode = Instruction::And;
4351 case Intrinsic::vector_reduce_add:
4352 TreeOpcode = Instruction::Add;
4360 SmallVector<Value *, 8> Worklist;
4361 SmallVector<Value *, 8> Sources;
4363 std::optional<bool> IsAShr;
4364 constexpr unsigned MaxSources = 8;
4369 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4370 Sources.
size() <= MaxSources) {
4379 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4381 IsAShr = ThisIsAShr;
4382 else if (*IsAShr != ThisIsAShr)
4408 if (Sources.
empty() || Sources.
size() > MaxSources ||
4409 Worklist.
size() > MaxSources || !IsAShr)
4412 unsigned NumSources = Sources.
size();
4416 if (OrigIID == Intrinsic::vector_reduce_add &&
4424 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4427 NegativeVal.negate();
4459 TestsNegative =
false;
4460 }
else if (*CmpVal == NegativeVal) {
4461 TestsNegative =
true;
4465 IsEq = Pred == ICmpInst::ICMP_EQ;
4466 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4468 TestsNegative = (RangeHigh == NegativeVal);
4469 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4471 TestsNegative = (RangeHigh == NegativeVal);
4472 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4474 TestsNegative = (RangeLow == NegativeVal);
4475 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4477 TestsNegative = (RangeLow == NegativeVal);
4520 enum CheckKind :
unsigned {
4527 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4529 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4531 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4535 case Intrinsic::vector_reduce_or:
4536 case Intrinsic::vector_reduce_umax:
4537 Base = TestsNegative ? AnyNeg : AllNonNeg;
4539 case Intrinsic::vector_reduce_and:
4540 case Intrinsic::vector_reduce_umin:
4541 Base = TestsNegative ? AllNeg : AnyNonNeg;
4543 case Intrinsic::vector_reduce_add:
4544 Base = TestsNegative ? AllNeg : AllNonNeg;
4559 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4560 : std::make_pair(MinMax, MinMaxCost);
4564 auto [NewIID, NewCost] = RequiresOr(
Check)
4565 ? PickCheaper(Intrinsic::vector_reduce_or,
4566 Intrinsic::vector_reduce_umax)
4567 : PickCheaper(
Intrinsic::vector_reduce_and,
4571 if (NumSources > 1) {
4572 unsigned CombineOpc =
4573 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4578 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4579 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4581 if (NewCost > OldCost)
4586 Type *ScalarTy = VecTy->getScalarType();
4589 if (NumSources == 1) {
4600 replaceValue(
I, *NewCmp);
4631bool VectorCombine::foldReductionZeroTest(Instruction &
I) {
4640 if (!
II || !
II->hasOneUse())
4643 auto ReduceID =
II->getIntrinsicID();
4644 if (ReduceID != Intrinsic::vector_reduce_or &&
4645 ReduceID != Intrinsic::vector_reduce_umax)
4648 Value *Vec =
II->getArgOperand(0);
4650 if (!VecTy || !VecTy->getElementType()->isIntegerTy())
4655 ? Intrinsic::vector_reduce_or
4670 LLVM_DEBUG(
dbgs() <<
"Found a reduction zero test: " <<
I <<
"\n OldCost: "
4671 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4673 if (!OldCost.
isValid() || !NewCost.
isValid() || NewCost > OldCost)
4679 replaceValue(
I, *NewReduce);
4704bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4715 switch (
II->getIntrinsicID()) {
4716 case Intrinsic::vector_reduce_add:
4717 case Intrinsic::vector_reduce_or:
4718 case Intrinsic::vector_reduce_umin:
4719 case Intrinsic::vector_reduce_umax:
4720 case Intrinsic::vector_reduce_smin:
4721 case Intrinsic::vector_reduce_smax:
4727 Value *InnerOp =
II->getArgOperand(0);
4770 switch (
II->getIntrinsicID()) {
4771 case Intrinsic::vector_reduce_add: {
4776 unsigned NumElems = XTy->getNumElements();
4782 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4790 case Intrinsic::vector_reduce_smin:
4791 case Intrinsic::vector_reduce_smax:
4801 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4817 case Intrinsic::vector_reduce_add:
4818 case Intrinsic::vector_reduce_or:
4824 case Intrinsic::vector_reduce_umin:
4825 case Intrinsic::vector_reduce_umax:
4826 case Intrinsic::vector_reduce_smin:
4827 case Intrinsic::vector_reduce_smax:
4839 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4841 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4842 << *InnerOp <<
"\n OldCost: " << OldCost
4843 <<
" vs NewCost: " << NewCost <<
"\n");
4849 if (NewCost > OldCost)
4858 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4859 replaceValue(
I, *NewCmp);
4890bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4893 const APInt *CmpVal;
4898 if (!
II || !
II->hasOneUse())
4901 const auto IsValidOrUmaxCmp = [&]() {
4910 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4912 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4913 Pred == ICmpInst::ICMP_SLT;
4914 return IsEquality || IsPositive || IsNegative;
4917 const auto IsValidAndUminCmp = [&]() {
4922 const auto LeadingOnes = CmpVal->
countl_one();
4929 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4938 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4939 return IsEquality || IsNegative || IsPositive;
4947 switch (OriginalIID) {
4948 case Intrinsic::vector_reduce_or:
4949 if (!IsValidOrUmaxCmp())
4951 AlternativeIID = Intrinsic::vector_reduce_umax;
4953 case Intrinsic::vector_reduce_umax:
4954 if (!IsValidOrUmaxCmp())
4956 AlternativeIID = Intrinsic::vector_reduce_or;
4958 case Intrinsic::vector_reduce_and:
4959 if (!IsValidAndUminCmp())
4961 AlternativeIID = Intrinsic::vector_reduce_umin;
4963 case Intrinsic::vector_reduce_umin:
4964 if (!IsValidAndUminCmp())
4966 AlternativeIID = Intrinsic::vector_reduce_and;
4979 if (ReductionOpc != Instruction::ICmp)
4990 <<
"\n OrigCost: " << OrigCost
4991 <<
" vs AltCost: " << AltCost <<
"\n");
4993 if (AltCost >= OrigCost)
4997 Type *ScalarTy = VecTy->getScalarType();
5000 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
5002 replaceValue(
I, *NewCmp);
5016 unsigned Depth = 0) {
5017 constexpr unsigned MaxLocalDepth = 2;
5018 if (
Depth > MaxLocalDepth)
5021 auto NumSignBits = [&](
const Value *
X) {
5024 if (NumSignBits(V) == V->getType()->getScalarSizeInBits())
5029 return NumSignBits(
A) >= 2 && NumSignBits(
B) >= 2 &&
5040bool VectorCombine::foldReduceAddCmpZero(Instruction &
I) {
5050 if (!VecTy || VecTy->getNumElements() < 2)
5056 if (!IsNonNegative && !IsNonPositive)
5061 unsigned NumElts = VecTy->getNumElements();
5063 if (
Log2_32(NumElts) >= NumSignBits)
5066 ICmpInst::Predicate NewPred;
5068 case ICmpInst::ICMP_EQ:
5069 case ICmpInst::ICMP_ULE:
5070 case ICmpInst::ICMP_SLE:
5071 case ICmpInst::ICMP_SGE:
5072 NewPred = ICmpInst::ICMP_EQ;
5074 case ICmpInst::ICMP_NE:
5075 case ICmpInst::ICMP_UGT:
5076 case ICmpInst::ICMP_SGT:
5077 case ICmpInst::ICMP_SLT:
5078 NewPred = ICmpInst::ICMP_NE;
5088 if (!IsNonNegative &&
5089 (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE))
5091 if (!IsNonPositive &&
5092 (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE))
5094 if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE ||
5095 Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) &&
5096 Log2_32(NumElts) >= NumSignBits - 1)
5100 Instruction::Add, VecTy, std::nullopt,
CostKind);
5102 Instruction::Or, VecTy, std::nullopt,
CostKind);
5104 Intrinsic::umax, VecTy, FastMathFlags(),
CostKind);
5107 bool UseOr = OrCost.
isValid() && (!UmaxCost.
isValid() || OrCost <= UmaxCost);
5109 if (AltCost > OrigCost)
5115 Intrinsic::vector_reduce_umax, {VecTy}, {Vec});
5116 Worklist.pushValue(NewReduce);
5118 NewPred, NewReduce, ConstantInt::getNullValue(VecTy->getScalarType()));
5119 replaceValue(
I, *NewCmp);
5128 constexpr unsigned MaxVisited = 32;
5131 bool FoundReduction =
false;
5134 while (!WorkList.
empty()) {
5136 for (
User *U :
I->users()) {
5138 if (!UI || !Visited.
insert(UI).second)
5140 if (Visited.
size() > MaxVisited)
5146 switch (
II->getIntrinsicID()) {
5147 case Intrinsic::vector_reduce_add:
5148 case Intrinsic::vector_reduce_mul:
5149 case Intrinsic::vector_reduce_and:
5150 case Intrinsic::vector_reduce_or:
5151 case Intrinsic::vector_reduce_xor:
5152 case Intrinsic::vector_reduce_smin:
5153 case Intrinsic::vector_reduce_smax:
5154 case Intrinsic::vector_reduce_umin:
5155 case Intrinsic::vector_reduce_umax:
5156 FoundReduction =
true;
5169 return FoundReduction;
5182bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
5187 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
5195 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
5197 if (!
I ||
I->getOperand(0)->getType() != VT)
5199 return any_of(
I->users(), [&](User *U) {
5200 return U != Op0 && U != Op1 &&
5201 !(isa<ShuffleVectorInst>(U) &&
5202 (InputShuffles.contains(cast<Instruction>(U)) ||
5203 isInstructionTriviallyDead(cast<Instruction>(U))));
5206 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
5207 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
5215 for (
auto *U :
I->users()) {
5217 if (!SV || SV->getType() != VT)
5219 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
5220 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
5227 if (!collectShuffles(Op0) || !collectShuffles(Op1))
5231 if (FromReduction && Shuffles.
size() > 1)
5236 if (!FromReduction) {
5237 for (
size_t Idx = 0,
E = Shuffles.
size(); Idx !=
E; ++Idx) {
5238 for (
auto *U : Shuffles[Idx]->
users()) {
5253 int MaxV1Elt = 0, MaxV2Elt = 0;
5254 unsigned NumElts = VT->getNumElements();
5255 for (ShuffleVectorInst *SVN : Shuffles) {
5256 SmallVector<int>
Mask;
5257 SVN->getShuffleMask(Mask);
5261 Value *SVOp0 = SVN->getOperand(0);
5262 Value *SVOp1 = SVN->getOperand(1);
5267 for (
int &Elem : Mask) {
5273 if (SVOp0 == Op1 && SVOp1 == Op0) {
5277 if (SVOp0 != Op0 || SVOp1 != Op1)
5283 SmallVector<int> ReconstructMask;
5284 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5287 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5288 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5289 auto It =
find_if(
V1, [&](
const std::pair<int, int> &
A) {
5290 return Mask[
I] ==
A.first;
5296 V1.emplace_back(Mask[
I],
V1.size());
5299 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5300 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5301 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5315 sort(ReconstructMask);
5316 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5323 if (
V1.empty() || V2.
empty() ||
5324 (MaxV1Elt ==
static_cast<int>(
V1.size()) - 1 &&
5325 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5337 if (InputShuffles.contains(SSV))
5339 return SV->getMaskValue(M);
5347 std::pair<int, int>
Y) {
5348 int MXA = GetBaseMaskValue(
A,
X.first);
5349 int MYA = GetBaseMaskValue(
A,
Y.first);
5353 return SortBase(SVI0A,
A,
B);
5355 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5356 return SortBase(SVI1A,
A,
B);
5361 for (
const auto &Mask : OrigReconstructMasks) {
5362 SmallVector<int> ReconstructMask;
5363 for (
int M : Mask) {
5365 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5366 assert(It !=
V.end() &&
"Expected all entries in Mask");
5367 return std::distance(
V.begin(), It);
5371 else if (M <
static_cast<int>(NumElts)) {
5374 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5377 ReconstructMasks.
push_back(std::move(ReconstructMask));
5382 SmallVector<int> V1A, V1B, V2A, V2B;
5383 for (
unsigned I = 0;
I <
V1.size();
I++) {
5387 for (
unsigned I = 0;
I < V2.
size();
I++) {
5388 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5389 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5391 while (V1A.
size() < NumElts) {
5395 while (V2A.
size() < NumElts) {
5407 VT, VT, SV->getShuffleMask(),
CostKind);
5414 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5415 unsigned MaxVectorSize =
5417 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5418 if (MaxElementsInVector == 0)
5427 std::set<SmallVector<int, 4>> UniqueShuffles;
5432 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5433 if (NumFullVectors < 2)
5434 return C + ShuffleCost;
5435 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5436 unsigned NumUniqueGroups = 0;
5437 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5440 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5441 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5442 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5443 if (UniqueShuffles.insert(SubShuffle).second)
5444 NumUniqueGroups += 1;
5446 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5452 SmallVector<int, 16>
Mask;
5453 SV->getShuffleMask(Mask);
5454 return AddShuffleMaskAdjustedCost(
C, Mask);
5457 auto AllShufflesHaveSameOperands =
5458 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5459 if (InputShuffles.size() < 2)
5461 ShuffleVectorInst *FirstSV =
5468 std::next(InputShuffles.begin()), InputShuffles.end(),
5469 [&](Instruction *
I) {
5470 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5471 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5480 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5482 if (AllShufflesHaveSameOperands(InputShuffles)) {
5483 UniqueShuffles.clear();
5484 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5487 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5493 FixedVectorType *Op0SmallVT =
5495 FixedVectorType *Op1SmallVT =
5500 UniqueShuffles.clear();
5501 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5503 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5505 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5508 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5510 <<
" vs CostAfter: " << CostAfter <<
"\n");
5511 if (CostBefore < CostAfter ||
5522 if (InputShuffles.contains(SSV))
5524 return SV->getOperand(
Op);
5528 GetShuffleOperand(SVI0A, 1), V1A);
5531 GetShuffleOperand(SVI0B, 1), V1B);
5534 GetShuffleOperand(SVI1A, 1), V2A);
5537 GetShuffleOperand(SVI1B, 1), V2B);
5542 I->copyIRFlags(Op0,
true);
5547 I->copyIRFlags(Op1,
true);
5549 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5552 replaceValue(*Shuffles[S], *NSV,
false);
5555 Worklist.pushValue(NSV0A);
5556 Worklist.pushValue(NSV0B);
5557 Worklist.pushValue(NSV1A);
5558 Worklist.pushValue(NSV1B);
5568bool VectorCombine::shrinkType(Instruction &
I) {
5569 Value *ZExted, *OtherOperand;
5575 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5579 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5581 if (
I.getOpcode() == Instruction::LShr) {
5598 Instruction::ZExt, BigTy, SmallTy,
5599 TargetTransformInfo::CastContextHint::None,
CostKind);
5604 for (User *U : ZExtOperand->
users()) {
5611 ShrinkCost += ZExtCost;
5626 ShrinkCost += ZExtCost;
5633 Instruction::Trunc, SmallTy, BigTy,
5634 TargetTransformInfo::CastContextHint::None,
CostKind);
5639 if (ShrinkCost > CurrentCost)
5643 Value *Op0 = ZExted;
5646 if (
I.getOperand(0) == OtherOperand)
5653 replaceValue(
I, *NewZExtr);
5659bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5660 Value *DstVec, *SrcVec;
5661 uint64_t ExtIdx, InsIdx;
5671 if (!DstVecTy || !SrcVecTy ||
5677 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5684 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5686 if (NeedDstSrcSwap) {
5688 Mask[InsIdx] = ExtIdx % NumDstElts;
5692 std::iota(
Mask.begin(),
Mask.end(), 0);
5693 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5706 SmallVector<int> ExtToVecMask;
5707 if (!NeedExpOrNarrow) {
5712 nullptr, {DstVec, SrcVec});
5718 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5721 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5725 if (!Ext->hasOneUse())
5728 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5729 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5732 if (OldCost < NewCost)
5735 if (NeedExpOrNarrow) {
5736 if (!NeedDstSrcSwap)
5749 replaceValue(
I, *Shuf);
5758bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5759 const APInt *SplatVal0, *SplatVal1;
5769 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5770 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5779 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5780 << *
I.getType() <<
" is too high.\n");
5784 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5785 NewSplatVal <<= Width;
5786 NewSplatVal |= SplatVal0->
zext(Width * 2);
5788 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5823bool VectorCombine::foldDeinterleaveIntrinsics(Instruction &
I) {
5825 if (
DL->isBigEndian())
5828 using namespace PatternMatch;
5829 Value *DeinterleavedVal;
5840 unsigned HalfElementWidth = ElementWidth / 2;
5844 std::array<ExtractValueInst *, 2> OrigFields{};
5845 for (User *Usr :
I.users()) {
5848 if (!
E ||
E->getNumIndices() != 1)
5850 unsigned Idx = *
E->idx_begin();
5852 if (Idx >= 2 || OrigFields[Idx] || !
E->hasNUses(2))
5854 OrigFields[Idx] =
E;
5858 SmallVector<Instruction *, 2> MergeInsts;
5859 for (
auto *FieldUsr : OrigFields[0]->
users()) {
5867 auto MatchMerge = [&](void) ->
bool {
5870 return match(MergeInsts[0],
5874 match(MergeInsts[1],
5879 if (!MatchMerge()) {
5880 std::swap(MergeInsts[0], MergeInsts[1]);
5895 auto *NewFieldTy = VecTy->getWithNewBitWidth(HalfElementWidth);
5905 if (OldCost <= NewCost || !NewCost.
isValid()) {
5907 dbgs() <<
"VC: New deinterleave2 sequence cost (" << NewCost <<
")"
5908 <<
" is higher than that of the old one (" << OldCost <<
")\n");
5916 Intrinsic::vector_deinterleave2, {NewVecTy}, {NewVecCast});
5917 for (
auto [Idx, MergeInst] :
enumerate(MergeInsts)) {
5919 NewField = Builder.
CreateBitCast(NewField, MergeInst->getType());
5920 replaceValue(*MergeInst, *NewField);
5926bool VectorCombine::foldBitcastOfVPLoad(Instruction &
I) {
5927 const DataLayout &
DL =
I.getDataLayout();
5942 DL.getValueOrABITypeAlignment(
II->getPointerAlignment(), OrigVecTy);
5943 ElementCount OrigVecCnt = OrigVecTy->getElementCount();
5945 ElementCount NewVecCnt = NewVecTy->getElementCount();
5957 II->getMemoryPointerParam(),
false,
5963 {Intrinsic::vp_load, NewVecTy,
II->getMemoryPointerParam(),
false,
5967 <<
" NewCost=" << NewCost <<
"\n");
5968 if (NewCost > OldCost || !NewCost.
isValid())
5975 NewVecTy, Intrinsic::vp_load,
5976 {
II->getMemoryPointerParam(), NewMask, NewEVL});
5979 0, AttrBuilder(
II->getContext()).addAlignmentAttr(OrigAlign));
5980 replaceValue(*Cast, *NewVP);
5988bool VectorCombine::foldBitOrderReverseAndSwap(Instruction &
I) {
5994 Type *Ty =
I.getType();
5996 TypeSize ElementSize =
DL->getTypeStoreSize(Ty);
5999 Type *NewVecTy = VectorType::get(I8Ty, NewVecCnt);
6015 IntrinsicCostAttributes ICANew(Intrinsic::bitreverse, NewVecTy, {NewVecTy});
6018 InstructionCost NewCost = CastToVecCost + NewIntrinsicCost + CastToOrigCost;
6020 if (!InnerII->hasOneUse())
6024 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6026 if (!NewCost.
isValid() || NewCost >= OldCost)
6035 replaceValue(
I, *CastToOrig);
6040bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
6042 if (!OldLoad || !OldLoad->isSimple())
6049 unsigned const OldNumElements = OldLoadTy->getNumElements();
6055 using IndexRange = std::pair<int, int>;
6056 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
6057 IndexRange OutputRange = IndexRange(OldNumElements, -1);
6058 for (llvm::Use &Use :
I.uses()) {
6060 User *Shuffle =
Use.getUser();
6065 return std::nullopt;
6072 for (
int Index : Mask) {
6073 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
6074 OutputRange.first = std::min(Index, OutputRange.first);
6075 OutputRange.second = std::max(Index, OutputRange.second);
6080 if (OutputRange.second < OutputRange.first)
6081 return std::nullopt;
6087 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
6088 unsigned const NewNumElements = Indices->second + 1u;
6092 if (NewNumElements < OldNumElements) {
6097 Type *ElemTy = OldLoadTy->getElementType();
6099 Value *PtrOp = OldLoad->getPointerOperand();
6102 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
6103 OldLoad->getPointerAddressSpace(),
CostKind);
6106 OldLoad->getPointerAddressSpace(),
CostKind);
6108 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
6110 unsigned const MaxIndex = NewNumElements * 2u;
6112 for (llvm::Use &Use :
I.uses()) {
6119 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
6125 for (
int Index : OldMask) {
6126 if (Index >=
static_cast<int>(MaxIndex))
6140 dbgs() <<
"Found a load used only by shufflevector instructions: "
6141 <<
I <<
"\n OldCost: " << OldCost
6142 <<
" vs NewCost: " << NewCost <<
"\n");
6144 if (OldCost < NewCost || !NewCost.
isValid())
6150 NewLoad->copyMetadata(
I);
6153 for (UseEntry &Use : NewUses) {
6154 ShuffleVectorInst *Shuffle =
Use.first;
6155 std::vector<int> &NewMask =
Use.second;
6162 replaceValue(*Shuffle, *NewShuffle,
false);
6175bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
6177 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
6181 ArrayRef<int> Mask0;
6182 ArrayRef<int> Mask1;
6195 auto const InputNumElements = InputVT->getNumElements();
6197 if (InputNumElements >= ResultVT->getNumElements())
6202 SmallVector<int, 16> NewMask;
6205 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
6206 if (
M0 >= 0 &&
M1 >= 0)
6208 else if (
M0 == -1 &&
M1 == -1)
6221 int MaskOffset = NewMask[0
u];
6222 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
6225 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
6239 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
6242 if (NewCost > OldCost)
6254 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
6256 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
6262 replaceValue(*Phi, *NewShuf1);
6268bool VectorCombine::run() {
6282 auto Opcode =
I.getOpcode();
6290 if (IsFixedVectorType) {
6292 case Instruction::InsertElement:
6293 if (vectorizeLoadInsert(
I))
6296 case Instruction::ShuffleVector:
6297 if (widenSubvectorLoad(
I))
6308 if (scalarizeOpOrCmp(
I))
6310 if (scalarizeLoad(
I))
6312 if (scalarizeExtExtract(
I))
6314 if (scalarizeVPIntrinsic(
I))
6316 if (foldInterleaveIntrinsics(
I))
6318 if (foldBitcastOfVPLoad(
I))
6322 if (foldDeinterleaveIntrinsics(
I))
6325 if (Opcode == Instruction::Store)
6326 if (foldSingleElementStore(
I))
6330 if (TryEarlyFoldsOnly)
6333 if (Opcode == Instruction::Call)
6334 if (foldBitOrderReverseAndSwap(
I))
6341 if (IsFixedVectorType) {
6343 case Instruction::InsertElement:
6344 if (foldInsExtFNeg(
I))
6346 if (foldInsExtBinop(
I))
6348 if (foldInsExtVectorToShuffle(
I))
6351 case Instruction::ShuffleVector:
6352 if (foldPermuteOfBinops(
I))
6354 if (foldShuffleOfBinops(
I))
6356 if (foldShuffleOfSelects(
I))
6358 if (foldShuffleOfCastops(
I))
6360 if (foldShuffleOfShuffles(
I))
6362 if (foldPermuteOfIntrinsic(
I))
6364 if (foldShufflesOfLengthChangingShuffles(
I))
6366 if (foldShuffleOfIntrinsics(
I))
6368 if (foldSelectShuffle(
I))
6370 if (foldShuffleToIdentity(
I))
6373 case Instruction::Load:
6374 if (shrinkLoadForShuffles(
I))
6377 case Instruction::BitCast:
6378 if (foldBitcastShuffle(
I))
6380 if (foldSelectsFromBitcast(
I))
6383 case Instruction::And:
6384 case Instruction::Or:
6385 case Instruction::Xor:
6386 if (foldBitOpOfCastops(
I))
6388 if (foldBitOpOfCastConstant(
I))
6391 case Instruction::PHI:
6392 if (shrinkPhiOfShuffles(
I))
6402 case Instruction::Call:
6403 if (foldShuffleFromReductions(
I))
6405 if (foldCastFromReductions(
I))
6408 case Instruction::ExtractElement:
6409 if (foldShuffleChainsToReduce(
I))
6412 case Instruction::ICmp:
6413 if (foldSignBitReductionCmp(
I))
6415 if (foldICmpEqZeroVectorReduce(
I))
6417 if (foldReductionZeroTest(
I))
6419 if (foldEquivalentReductionCmp(
I))
6421 if (foldReduceAddCmpZero(
I))
6424 case Instruction::FCmp:
6425 if (foldExtractExtract(
I))
6428 case Instruction::Or:
6429 if (foldConcatOfBoolMasks(
I))
6434 if (foldExtractExtract(
I))
6436 if (foldExtractedCmps(
I))
6438 if (foldBinopOfReductions(
I))
6447 bool MadeChange =
false;
6448 for (BasicBlock &BB :
F) {
6460 if (!
I->isDebugOrPseudoInst())
6461 MadeChange |= FoldInst(*
I);
6468 while (!Worklist.isEmpty()) {
6478 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, Use *From, FixedVectorType *Ty, const DenseSet< std::pair< Value *, Use * > > &IdentityLeafs, const DenseSet< std::pair< Value *, Use * > > &SplatLeafs, const DenseSet< std::pair< Value *, Use * > > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
std::pair< Value *, int > InstLane
static bool isKnownNonPositive(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Used by foldReduceAddCmpZero to check if we can prove that a value is non-positive.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, const SimplifyQuery &SQ)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static InstLane lookThroughShuffles(Value *V, int Lane)
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void addParamAttrs(unsigned ArgNo, const AttrBuilder &B)
Adds attributes to the indicated argument.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
LLVM_ABI CallInst * CreateIntrinsicWithoutFolding(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI Value * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isIdempotent() const
Return true if the instruction is idempotent:
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
auto m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd >::Ty m_Deinterleave2(const Opnd &Op)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
RelativeUniformCounterPtr Values
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const