43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoad(Instruction &
I);
133 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
134 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
135 bool scalarizeExtExtract(Instruction &
I);
136 bool foldConcatOfBoolMasks(Instruction &
I);
137 bool foldPermuteOfBinops(Instruction &
I);
138 bool foldShuffleOfBinops(Instruction &
I);
139 bool foldShuffleOfSelects(Instruction &
I);
140 bool foldShuffleOfCastops(Instruction &
I);
141 bool foldShuffleOfShuffles(Instruction &
I);
142 bool foldPermuteOfIntrinsic(Instruction &
I);
143 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
144 bool foldShuffleOfIntrinsics(Instruction &
I);
145 bool foldShuffleToIdentity(Instruction &
I);
146 bool foldShuffleFromReductions(Instruction &
I);
147 bool foldShuffleChainsToReduce(Instruction &
I);
148 bool foldCastFromReductions(Instruction &
I);
149 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
150 bool foldInterleaveIntrinsics(Instruction &
I);
151 bool shrinkType(Instruction &
I);
152 bool shrinkLoadForShuffles(Instruction &
I);
153 bool shrinkPhiOfShuffles(Instruction &
I);
155 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
161 Worklist.pushUsersToWorkList(*NewI);
162 Worklist.pushValue(NewI);
179 SmallPtrSet<Value *, 4> Visited;
184 OpI,
nullptr,
nullptr, [&](
Value *V) {
189 NextInst = NextInst->getNextNode();
194 Worklist.pushUsersToWorkList(*OpI);
195 Worklist.pushValue(OpI);
215 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
216 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
222 Type *ScalarTy = Load->getType()->getScalarType();
224 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
225 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
232bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
258 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
261 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
262 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
263 unsigned OffsetEltIndex = 0;
271 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
272 APInt
Offset(OffsetBitWidth, 0);
282 uint64_t ScalarSizeInBytes = ScalarSize / 8;
283 if (
Offset.urem(ScalarSizeInBytes) != 0)
287 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
288 if (OffsetEltIndex >= MinVecNumElts)
305 unsigned AS =
Load->getPointerAddressSpace();
324 unsigned OutputNumElts = Ty->getNumElements();
326 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
327 Mask[0] = OffsetEltIndex;
334 if (OldCost < NewCost || !NewCost.
isValid())
345 replaceValue(
I, *VecLd);
353bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
356 if (!Shuf->isIdentityWithPadding())
362 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
363 return M >= (int)(NumOpElts);
374 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
382 unsigned AS =
Load->getPointerAddressSpace();
397 if (OldCost < NewCost || !NewCost.
isValid())
404 replaceValue(
I, *VecLd);
411ExtractElementInst *VectorCombine::getShuffleExtract(
412 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
416 assert(Index0C && Index1C &&
"Expected constant extract indexes");
418 unsigned Index0 = Index0C->getZExtValue();
419 unsigned Index1 = Index1C->getZExtValue();
422 if (Index0 == Index1)
446 if (PreferredExtractIndex == Index0)
448 if (PreferredExtractIndex == Index1)
452 return Index0 > Index1 ? Ext0 : Ext1;
460bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
461 ExtractElementInst *Ext1,
462 const Instruction &
I,
463 ExtractElementInst *&ConvertToShuffle,
464 unsigned PreferredExtractIndex) {
467 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
469 unsigned Opcode =
I.getOpcode();
482 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
483 "Expected a compare");
493 unsigned Ext0Index = Ext0IndexC->getZExtValue();
494 unsigned Ext1Index = Ext1IndexC->getZExtValue();
508 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
509 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
510 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
515 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
520 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
522 OldCost = CheapExtractCost + ScalarOpCost;
523 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
527 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
528 NewCost = VectorOpCost + CheapExtractCost +
533 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
534 if (ConvertToShuffle) {
546 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
548 ShuffleMask[BestInsIndex] = BestExtIndex;
550 VecTy, VecTy, ShuffleMask,
CostKind, 0,
551 nullptr, {ConvertToShuffle});
554 VecTy, VecTy, {},
CostKind, 0,
nullptr,
562 return OldCost < NewCost;
574 ShufMask[NewIndex] = OldIndex;
575 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
627 V1,
"foldExtExtBinop");
632 VecBOInst->copyIRFlags(&
I);
638bool VectorCombine::foldExtractExtract(Instruction &
I) {
669 ExtractElementInst *ExtractToChange;
670 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
676 if (ExtractToChange) {
677 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
682 if (ExtractToChange == Ext0)
691 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
692 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
695 replaceValue(
I, *NewExt);
701bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
704 uint64_t ExtIdx, InsIdx;
719 auto *DstVecScalarTy = DstVecTy->getScalarType();
721 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
726 unsigned NumDstElts = DstVecTy->getNumElements();
727 unsigned NumSrcElts = SrcVecTy->getNumElements();
728 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
734 SmallVector<int>
Mask(NumDstElts);
735 std::iota(
Mask.begin(),
Mask.end(), 0);
736 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
752 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
755 SmallVector<int> SrcMask;
758 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
760 DstVecTy, SrcVecTy, SrcMask,
CostKind);
764 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
766 if (NewCost > OldCost)
769 Value *NewShuf, *LenChgShuf =
nullptr;
783 replaceValue(
I, *NewShuf);
789bool VectorCombine::foldInsExtBinop(Instruction &
I) {
790 BinaryOperator *VecBinOp, *SclBinOp;
822 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
824 if (NewCost > OldCost)
835 NewInst->copyIRFlags(VecBinOp);
836 NewInst->andIRFlags(SclBinOp);
841 replaceValue(
I, *NewBO);
847bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
850 if (!BinOp || !BinOp->isBitwiseLogicOp())
856 if (!LHSCast || !RHSCast) {
857 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
863 if (CastOpcode != RHSCast->getOpcode())
867 switch (CastOpcode) {
868 case Instruction::BitCast:
869 case Instruction::Trunc:
870 case Instruction::SExt:
871 case Instruction::ZExt:
877 Value *LHSSrc = LHSCast->getOperand(0);
878 Value *RHSSrc = RHSCast->getOperand(0);
884 auto *SrcTy = LHSSrc->
getType();
885 auto *DstTy =
I.getType();
888 if (CastOpcode != Instruction::BitCast &&
893 if (!SrcTy->getScalarType()->isIntegerTy() ||
894 !DstTy->getScalarType()->isIntegerTy())
909 LHSCastCost + RHSCastCost;
920 if (!LHSCast->hasOneUse())
921 NewCost += LHSCastCost;
922 if (!RHSCast->hasOneUse())
923 NewCost += RHSCastCost;
926 <<
" NewCost=" << NewCost <<
"\n");
928 if (NewCost > OldCost)
933 BinOp->getName() +
".inner");
935 NewBinOp->copyIRFlags(BinOp);
949 replaceValue(
I, *Result);
958bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
974 switch (CastOpcode) {
975 case Instruction::BitCast:
976 case Instruction::ZExt:
977 case Instruction::SExt:
978 case Instruction::Trunc:
984 Value *LHSSrc = LHSCast->getOperand(0);
986 auto *SrcTy = LHSSrc->
getType();
987 auto *DstTy =
I.getType();
990 if (CastOpcode != Instruction::BitCast &&
995 if (!SrcTy->getScalarType()->isIntegerTy() ||
996 !DstTy->getScalarType()->isIntegerTy())
1000 PreservedCastFlags RHSFlags;
1025 if (!LHSCast->hasOneUse())
1026 NewCost += LHSCastCost;
1028 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1029 <<
" NewCost=" << NewCost <<
"\n");
1031 if (NewCost > OldCost)
1036 LHSSrc, InvC,
I.getName() +
".inner");
1038 NewBinOp->copyIRFlags(&
I);
1058 replaceValue(
I, *Result);
1065bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1079 if (!DestTy || !SrcTy)
1082 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1083 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1084 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1094 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1095 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1099 SmallVector<int, 16> NewMask;
1100 if (DestEltSize <= SrcEltSize) {
1103 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1104 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1109 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1110 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1117 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1118 auto *NewShuffleTy =
1120 auto *OldShuffleTy =
1122 unsigned NumOps = IsUnary ? 1 : 2;
1132 TargetTransformInfo::CastContextHint::None,
1137 TargetTransformInfo::CastContextHint::None,
1140 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1141 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1143 if (NewCost > OldCost || !NewCost.
isValid())
1151 replaceValue(
I, *Shuf);
1158bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1172 if (!ScalarOp0 || !ScalarOp1)
1180 auto IsAllTrueMask = [](
Value *MaskVal) {
1183 return ConstValue->isAllOnesValue();
1197 SmallVector<int>
Mask;
1199 Mask.resize(FVTy->getNumElements(), 0);
1208 Args.push_back(
V->getType());
1209 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1214 std::optional<unsigned> FunctionalOpcode =
1216 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1217 if (!FunctionalOpcode) {
1226 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1236 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1238 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1241 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1244 if (OldCost < NewCost || !NewCost.
isValid())
1255 bool SafeToSpeculate;
1261 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1262 if (!SafeToSpeculate &&
1269 {ScalarOp0, ScalarOp1})
1271 ScalarOp0, ScalarOp1);
1280bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1285 if (!UO && !BO && !CI && !
II)
1293 if (Arg->getType() !=
II->getType() &&
1303 for (User *U :
I.users())
1310 std::optional<uint64_t>
Index;
1312 auto Ops =
II ?
II->args() :
I.operands();
1316 uint64_t InsIdx = 0;
1321 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1327 else if (InsIdx != *Index)
1344 if (!
Index.has_value())
1348 Type *ScalarTy = VecTy->getScalarType();
1349 assert(VecTy->isVectorTy() &&
1352 "Unexpected types for insert element into binop or cmp");
1354 unsigned Opcode =
I.getOpcode();
1362 }
else if (UO || BO) {
1366 IntrinsicCostAttributes ScalarICA(
1367 II->getIntrinsicID(), ScalarTy,
1370 IntrinsicCostAttributes VectorICA(
1371 II->getIntrinsicID(), VecTy,
1378 Value *NewVecC =
nullptr;
1380 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1383 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1385 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1399 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1401 II->getIntrinsicID(), Idx, &
TTI)))
1404 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1405 OldCost += InsertCost;
1406 NewCost += !
Op->hasOneUse() * InsertCost;
1410 if (OldCost < NewCost || !NewCost.
isValid())
1420 ++NumScalarIntrinsic;
1430 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1436 Scalar->setName(
I.getName() +
".scalar");
1441 ScalarInst->copyIRFlags(&
I);
1444 replaceValue(
I, *Insert);
1451bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1456 if (!BI || !
I.getType()->isIntegerTy(1))
1461 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1464 CmpPredicate
P0,
P1;
1476 uint64_t Index0, Index1;
1483 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1486 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1487 "Unknown ExtractElementInst");
1492 unsigned CmpOpcode =
1507 Ext0Cost + Ext1Cost + CmpCost * 2 +
1513 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1514 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1519 ShufMask[CheapIndex] = ExpensiveIndex;
1524 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1525 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1530 if (OldCost < NewCost || !NewCost.
isValid())
1540 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1541 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1544 replaceValue(
I, *NewExt);
1557 unsigned ReductionOpc =
1563 CostBeforeReduction =
1564 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1566 CostAfterReduction =
1567 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1571 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1577 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1584 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1587 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1589 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1592 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1593 CostAfterReduction =
TTI.getMulAccReductionCost(
1594 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1597 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1601bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1604 if (BinOpOpc == Instruction::Sub)
1605 ReductionIID = Intrinsic::vector_reduce_add;
1609 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1614 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1615 return II->getArgOperand(0);
1619 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1622 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1631 unsigned ReductionOpc =
1644 CostOfRedOperand0 + CostOfRedOperand1 +
1647 if (NewCost >= OldCost || !NewCost.
isValid())
1651 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1654 if (BinOpOpc == Instruction::Or)
1655 VectorBO = Builder.
CreateOr(V0, V1,
"",
1661 replaceValue(
I, *Rdx);
1669 unsigned NumScanned = 0;
1670 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1679class ScalarizationResult {
1680 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1685 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1686 : Status(Status), ToFreeze(ToFreeze) {}
1689 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1690 ~ScalarizationResult() {
1691 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1694 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1695 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1696 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1697 return {StatusTy::SafeWithFreeze, ToFreeze};
1701 bool isSafe()
const {
return Status == StatusTy::Safe; }
1703 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1706 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1711 Status = StatusTy::Unsafe;
1715 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1716 assert(isSafeWithFreeze() &&
1717 "should only be used when freezing is required");
1719 "UserI must be a user of ToFreeze");
1720 IRBuilder<>::InsertPointGuard Guard(Builder);
1725 if (
U.get() == ToFreeze)
1742 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1746 if (
C->getValue().ult(NumElements))
1747 return ScalarizationResult::safe();
1748 return ScalarizationResult::unsafe();
1753 return ScalarizationResult::unsafe();
1755 APInt Zero(IntWidth, 0);
1756 APInt MaxElts(IntWidth, NumElements);
1762 true, &AC, CtxI, &DT)))
1763 return ScalarizationResult::safe();
1764 return ScalarizationResult::unsafe();
1777 if (ValidIndices.
contains(IdxRange))
1778 return ScalarizationResult::safeWithFreeze(IdxBase);
1779 return ScalarizationResult::unsafe();
1791 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1803bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1815 if (!
match(
SI->getValueOperand(),
1822 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1825 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1826 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1827 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1831 if (ScalarizableIdx.isUnsafe() ||
1838 Worklist.
push(Load);
1840 if (ScalarizableIdx.isSafeWithFreeze())
1843 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1844 {ConstantInt::get(Idx->getType(), 0), Idx});
1848 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1851 replaceValue(
I, *NSI);
1861bool VectorCombine::scalarizeLoad(Instruction &
I) {
1868 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1871 bool AllExtracts =
true;
1872 bool AllBitcasts =
true;
1874 unsigned NumInstChecked = 0;
1879 for (User *U : LI->users()) {
1881 if (!UI || UI->getParent() != LI->getParent())
1886 if (UI->use_empty())
1890 AllExtracts =
false;
1892 AllBitcasts =
false;
1896 for (Instruction &
I :
1897 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1904 LastCheckedInst = UI;
1909 return scalarizeLoadExtract(LI, VecTy, Ptr);
1911 return scalarizeLoadBitcast(LI, VecTy, Ptr);
1916bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
1921 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1924 for (
auto &Pair : NeedFreeze)
1925 Pair.second.discard();
1933 for (User *U : LI->
users()) {
1938 if (ScalarIdx.isUnsafe())
1940 if (ScalarIdx.isSafeWithFreeze()) {
1941 NeedFreeze.try_emplace(UI, ScalarIdx);
1942 ScalarIdx.discard();
1948 Index ?
Index->getZExtValue() : -1);
1956 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
1957 <<
"\n LoadExtractCost: " << OriginalCost
1958 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1960 if (ScalarizedCost >= OriginalCost)
1967 Type *ElemType = VecTy->getElementType();
1970 for (User *U : LI->
users()) {
1972 Value *Idx = EI->getIndexOperand();
1975 auto It = NeedFreeze.find(EI);
1976 if (It != NeedFreeze.end())
1983 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1985 Align ScalarOpAlignment =
1987 NewLoad->setAlignment(ScalarOpAlignment);
1990 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1995 replaceValue(*EI, *NewLoad,
false);
1998 FailureGuard.release();
2003bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2009 Type *TargetScalarType =
nullptr;
2010 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2012 for (User *U : LI->
users()) {
2015 Type *DestTy = BC->getDestTy();
2019 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2020 if (DestBitWidth != VecBitWidth)
2024 if (!TargetScalarType)
2025 TargetScalarType = DestTy;
2026 else if (TargetScalarType != DestTy)
2034 if (!TargetScalarType)
2042 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2043 <<
"\n OriginalCost: " << OriginalCost
2044 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2046 if (ScalarizedCost >= OriginalCost)
2057 ScalarLoad->copyMetadata(*LI);
2060 for (User *U : LI->
users()) {
2062 replaceValue(*BC, *ScalarLoad,
false);
2068bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2083 Type *ScalarDstTy = DstTy->getElementType();
2084 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2090 unsigned ExtCnt = 0;
2091 bool ExtLane0 =
false;
2092 for (User *U : Ext->users()) {
2106 Instruction::And, ScalarDstTy,
CostKind,
2109 (ExtCnt - ExtLane0) *
2111 Instruction::LShr, ScalarDstTy,
CostKind,
2114 if (ScalarCost > VectorCost)
2117 Value *ScalarV = Ext->getOperand(0);
2124 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2125 bool AllExtractsTriggerUB =
true;
2126 ExtractElementInst *LastExtract =
nullptr;
2128 for (User *U : Ext->users()) {
2131 AllExtractsTriggerUB =
false;
2135 if (!LastExtract || LastExtract->
comesBefore(Extract))
2136 LastExtract = Extract;
2138 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2139 !AllExtractsTriggerUB ||
2147 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2148 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2149 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2151 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2152 for (User *U : Ext->users()) {
2158 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2159 : (Idx * SrcEltSizeInBits);
2162 U->replaceAllUsesWith(
And);
2170bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2171 Type *Ty =
I.getType();
2176 if (
DL->isBigEndian())
2187 uint64_t ShAmtX = 0;
2195 uint64_t ShAmtY = 0;
2203 if (ShAmtX > ShAmtY) {
2211 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2212 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2217 MaskTy->getNumElements() != ShAmtDiff ||
2218 MaskTy->getNumElements() > (
BitWidth / 2))
2223 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2224 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2227 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2244 if (Ty != ConcatIntTy)
2250 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2251 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2254 if (NewCost > OldCost)
2264 if (Ty != ConcatIntTy) {
2274 replaceValue(
I, *Result);
2280bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2281 BinaryOperator *BinOp;
2282 ArrayRef<int> OuterMask;
2290 Value *Op00, *Op01, *Op10, *Op11;
2291 ArrayRef<int> Mask0, Mask1;
2296 if (!Match0 && !Match1)
2309 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2312 unsigned NumSrcElts = BinOpTy->getNumElements();
2317 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2321 SmallVector<int> NewMask0, NewMask1;
2322 for (
int M : OuterMask) {
2323 if (M < 0 || M >= (
int)NumSrcElts) {
2327 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2328 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2332 unsigned NumOpElts = Op0Ty->getNumElements();
2333 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2334 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2336 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2337 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2346 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2347 0,
nullptr, {BinOp}, &
I);
2349 NewCost += BinOpCost;
2355 OldCost += Shuf0Cost;
2357 NewCost += Shuf0Cost;
2363 OldCost += Shuf1Cost;
2365 NewCost += Shuf1Cost;
2373 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2377 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2379 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2380 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2384 if (NewCost > OldCost)
2395 NewInst->copyIRFlags(BinOp);
2399 replaceValue(
I, *NewBO);
2405bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2406 ArrayRef<int> OldMask;
2413 if (
LHS->getOpcode() !=
RHS->getOpcode())
2417 bool IsCommutative =
false;
2426 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2437 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2440 unsigned NumSrcElts = BinOpTy->getNumElements();
2443 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2446 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2447 if (M >= (
int)NumSrcElts)
2451 SmallVector<int> NewMask0(OldMask);
2459 SmallVector<int> NewMask1(OldMask);
2482 ArrayRef<int> InnerMask;
2484 m_Mask(InnerMask)))) &&
2487 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2499 bool ReducedInstCount =
false;
2500 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2501 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2502 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2503 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2504 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2505 ReducedInstCount |= SingleSrcBinOp;
2507 auto *ShuffleCmpTy =
2510 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2511 if (!SingleSrcBinOp)
2524 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2531 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2540 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2544 NewInst->copyIRFlags(
LHS);
2545 NewInst->andIRFlags(
RHS);
2550 replaceValue(
I, *NewBO);
2557bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2559 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2570 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2576 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2577 ((SI0FOp !=
nullptr) &&
2578 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2584 auto SelOp = Instruction::Select;
2592 CostSel1 + CostSel2 +
2594 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2598 Mask,
CostKind, 0,
nullptr, {C1, C2});
2604 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2608 if (!Sel1->hasOneUse())
2609 NewCost += CostSel1;
2610 if (!Sel2->hasOneUse())
2611 NewCost += CostSel2;
2614 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2616 if (NewCost > OldCost)
2625 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2626 SI0FOp->getFastMathFlags());
2628 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2633 replaceValue(
I, *NewSel);
2639bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2641 ArrayRef<int> OldMask;
2650 if (!C0 || (IsBinaryShuffle && !C1))
2657 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2660 if (IsBinaryShuffle) {
2661 if (C0->getSrcTy() != C1->getSrcTy())
2664 if (Opcode != C1->getOpcode()) {
2666 Opcode = Instruction::SExt;
2675 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2678 unsigned NumSrcElts = CastSrcTy->getNumElements();
2679 unsigned NumDstElts = CastDstTy->getNumElements();
2680 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2681 "Only bitcasts expected to alter src/dst element counts");
2685 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2686 (NumDstElts % NumSrcElts) != 0)
2689 SmallVector<int, 16> NewMask;
2690 if (NumSrcElts >= NumDstElts) {
2693 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2694 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2699 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2700 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2705 auto *NewShuffleDstTy =
2714 if (IsBinaryShuffle)
2729 if (IsBinaryShuffle) {
2739 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2741 if (NewCost > OldCost)
2745 if (IsBinaryShuffle)
2755 NewInst->copyIRFlags(C0);
2756 if (IsBinaryShuffle)
2757 NewInst->andIRFlags(C1);
2761 replaceValue(
I, *Cast);
2771bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2772 ArrayRef<int> OuterMask;
2773 Value *OuterV0, *OuterV1;
2778 ArrayRef<int> InnerMask0, InnerMask1;
2779 Value *X0, *X1, *Y0, *Y1;
2784 if (!Match0 && !Match1)
2789 SmallVector<int, 16> PoisonMask1;
2794 InnerMask1 = PoisonMask1;
2798 X0 = Match0 ? X0 : OuterV0;
2799 Y0 = Match0 ? Y0 : OuterV0;
2800 X1 = Match1 ? X1 : OuterV1;
2801 Y1 = Match1 ? Y1 : OuterV1;
2805 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2809 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2810 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2815 SmallVector<int, 16> NewMask(OuterMask);
2816 Value *NewX =
nullptr, *NewY =
nullptr;
2817 for (
int &M : NewMask) {
2818 Value *Src =
nullptr;
2819 if (0 <= M && M < (
int)NumImmElts) {
2823 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2824 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2826 }
else if (M >= (
int)NumImmElts) {
2831 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2832 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2836 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2845 if (!NewX || NewX == Src) {
2849 if (!NewY || NewY == Src) {
2865 replaceValue(
I, *NewX);
2882 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2888 nullptr, {NewX, NewY});
2890 NewCost += InnerCost0;
2892 NewCost += InnerCost1;
2895 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2897 if (NewCost > OldCost)
2901 replaceValue(
I, *Shuf);
2917bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
2922 unsigned ChainLength = 0;
2923 SmallVector<int>
Mask;
2924 SmallVector<int> YMask;
2934 ArrayRef<int> OuterMask;
2935 Value *OuterV0, *OuterV1;
2936 if (ChainLength != 0 && !Trunk->
hasOneUse())
2939 m_Mask(OuterMask))))
2941 if (OuterV0->
getType() != TrunkType) {
2947 ArrayRef<int> InnerMask0, InnerMask1;
2948 Value *A0, *A1, *B0, *B1;
2953 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
2954 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
2955 if (Match0Leaf == Match1Leaf) {
2961 SmallVector<int> CommutedOuterMask;
2968 for (
int &M : CommutedOuterMask) {
2971 if (M < (
int)NumTrunkElts)
2976 OuterMask = CommutedOuterMask;
2995 int NumLeafElts = YType->getNumElements();
2996 SmallVector<int> LocalYMask(InnerMask1);
2997 for (
int &M : LocalYMask) {
2998 if (M >= NumLeafElts)
3008 Mask.assign(OuterMask);
3009 YMask.
assign(LocalYMask);
3010 OldCost = NewCost = LocalOldCost;
3017 SmallVector<int> NewYMask(YMask);
3019 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3020 if (LeafM == -1 || CombinedM == LeafM)
3022 if (CombinedM == -1) {
3032 SmallVector<int> NewMask;
3033 NewMask.
reserve(NumTrunkElts);
3034 for (
int M : Mask) {
3035 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3050 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3054 if (ChainLength == 1) {
3055 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3058 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3059 <<
" old cost: " << (OldCost + LocalOldCost)
3060 <<
" new cost: " << LocalNewCost <<
'\n';
3065 OldCost += LocalOldCost;
3066 NewCost = LocalNewCost;
3070 if (ChainLength <= 1)
3074 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3077 for (
int &M : Mask) {
3078 if (M >=
static_cast<int>(NumTrunkElts))
3079 M = YMask[
M - NumTrunkElts];
3083 replaceValue(
I, *Root);
3090 replaceValue(
I, *Root);
3096bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3098 ArrayRef<int> OldMask;
3108 if (IID != II1->getIntrinsicID())
3117 if (!ShuffleDstTy || !II0Ty)
3123 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3125 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3131 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3135 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3136 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3138 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3142 ShuffleDstTy->getNumElements());
3144 std::pair<Value *, Value *> OperandPair =
3145 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3146 if (!SeenOperandPairs.
insert(OperandPair).second) {
3152 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3155 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3158 if (!II0->hasOneUse())
3160 if (II1 != II0 && !II1->hasOneUse())
3164 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3167 if (NewCost > OldCost)
3171 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3172 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3176 std::pair<Value *, Value *> OperandPair =
3177 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3178 auto It = ShuffleCache.
find(OperandPair);
3179 if (It != ShuffleCache.
end()) {
3185 II1->getArgOperand(
I), OldMask);
3186 ShuffleCache[OperandPair] = Shuf;
3194 NewInst->copyIRFlags(II0);
3195 NewInst->andIRFlags(II1);
3198 replaceValue(
I, *NewIntrinsic);
3204bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3216 if (!ShuffleDstTy || !IntrinsicSrcTy)
3220 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3221 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3232 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3236 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3238 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3242 ShuffleDstTy->getNumElements());
3245 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3246 {II0->getArgOperand(
I)});
3249 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3252 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3253 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3255 if (NewCost > OldCost)
3260 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3275 replaceValue(
I, *NewIntrinsic);
3285 int M = SV->getMaskValue(Lane);
3288 if (
static_cast<unsigned>(M) < NumElts) {
3289 U = &SV->getOperandUse(0);
3292 U = &SV->getOperandUse(1);
3303 auto [U, Lane] = IL;
3317 unsigned NumElts = Ty->getNumElements();
3318 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3324 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3330 unsigned NumSlices = Item.
size() / NumElts;
3335 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3336 Use *SliceV = Item[Slice * NumElts].first;
3337 if (!SliceV || SliceV->get()->
getType() != Ty)
3339 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3340 auto [V, Lane] = Item[Slice * NumElts + Elt];
3341 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3354 auto [FrontU, FrontLane] = Item.
front();
3356 if (IdentityLeafs.
contains(FrontU)) {
3357 return FrontU->get();
3361 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3363 if (ConcatLeafs.
contains(FrontU)) {
3367 for (
unsigned S = 0; S < Values.
size(); ++S)
3368 Values[S] = Item[S * NumElts].first->get();
3370 while (Values.
size() > 1) {
3373 std::iota(Mask.begin(), Mask.end(), 0);
3375 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3377 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3385 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3387 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3390 Ops[Idx] =
II->getOperand(Idx);
3394 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3399 for (
const auto &Lane : Item)
3412 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3422 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3427 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3441bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3443 if (!Ty ||
I.use_empty())
3447 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3452 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3453 unsigned NumVisited = 0;
3455 while (!Worklist.
empty()) {
3460 auto [FrontU, FrontLane] = Item.
front();
3468 return X->getType() ==
Y->getType() &&
3473 if (FrontLane == 0 &&
3475 Ty->getNumElements() &&
3478 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3479 E.value().second == (int)
E.index());
3481 IdentityLeafs.
insert(FrontU);
3486 C &&
C->getSplatValue() &&
3494 SplatLeafs.
insert(FrontU);
3499 auto [FrontU, FrontLane] = Item.
front();
3500 auto [
U, Lane] = IL;
3501 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3503 SplatLeafs.
insert(FrontU);
3509 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3513 Value *
V = IL.first->get();
3519 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3522 if (CI->getSrcTy()->getScalarType() !=
3527 SI->getOperand(0)->getType() !=
3534 II->getIntrinsicID() ==
3536 !
II->hasOperandBundles());
3543 BO && BO->isIntDivRem())
3548 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3549 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3556 if (DstTy && SrcTy &&
3557 SrcTy->getNumElements() == DstTy->getNumElements()) {
3568 !
II->hasOperandBundles()) {
3569 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3588 ConcatLeafs.
insert(FrontU);
3595 if (NumVisited <= 1)
3598 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3604 ConcatLeafs, Builder, &
TTI);
3605 replaceValue(
I, *V);
3612bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3616 switch (
II->getIntrinsicID()) {
3617 case Intrinsic::vector_reduce_add:
3618 case Intrinsic::vector_reduce_mul:
3619 case Intrinsic::vector_reduce_and:
3620 case Intrinsic::vector_reduce_or:
3621 case Intrinsic::vector_reduce_xor:
3622 case Intrinsic::vector_reduce_smin:
3623 case Intrinsic::vector_reduce_smax:
3624 case Intrinsic::vector_reduce_umin:
3625 case Intrinsic::vector_reduce_umax:
3634 std::queue<Value *> Worklist;
3635 SmallPtrSet<Value *, 4> Visited;
3636 ShuffleVectorInst *Shuffle =
nullptr;
3640 while (!Worklist.empty()) {
3641 Value *CV = Worklist.front();
3653 if (CI->isBinaryOp()) {
3654 for (
auto *
Op : CI->operand_values())
3658 if (Shuffle && Shuffle != SV)
3675 for (
auto *V : Visited)
3676 for (
auto *U :
V->users())
3677 if (!Visited.contains(U) && U != &
I)
3680 FixedVectorType *VecType =
3684 FixedVectorType *ShuffleInputType =
3686 if (!ShuffleInputType)
3692 SmallVector<int> ConcatMask;
3694 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3695 bool UsesSecondVec =
3696 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3703 ShuffleInputType, ConcatMask,
CostKind);
3705 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3707 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3709 bool MadeChanges =
false;
3710 if (NewCost < OldCost) {
3714 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3715 replaceValue(*Shuffle, *NewShuffle);
3721 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3767bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3769 std::queue<Value *> InstWorklist;
3773 std::optional<unsigned int> CommonCallOp = std::nullopt;
3774 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3776 bool IsFirstCallOrBinInst =
true;
3777 bool ShouldBeCallOrBinInst =
true;
3783 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3793 int64_t
VecSize = FVT->getNumElements();
3799 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3800 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3810 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3811 Cur = (Cur + 1) / 2, --
Mask) {
3813 ExpectedParityMask |= (1ll <<
Mask);
3816 InstWorklist.push(VecOpEE);
3818 while (!InstWorklist.empty()) {
3819 Value *CI = InstWorklist.front();
3823 if (!ShouldBeCallOrBinInst)
3826 if (!IsFirstCallOrBinInst &&
3827 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3832 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3834 IsFirstCallOrBinInst =
false;
3837 CommonCallOp =
II->getIntrinsicID();
3838 if (
II->getIntrinsicID() != *CommonCallOp)
3841 switch (
II->getIntrinsicID()) {
3842 case Intrinsic::umin:
3843 case Intrinsic::umax:
3844 case Intrinsic::smin:
3845 case Intrinsic::smax: {
3846 auto *Op0 =
II->getOperand(0);
3847 auto *Op1 =
II->getOperand(1);
3855 ShouldBeCallOrBinInst ^= 1;
3857 IntrinsicCostAttributes ICA(
3858 *CommonCallOp,
II->getType(),
3859 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3866 InstWorklist.push(PrevVecV[1]);
3867 InstWorklist.push(PrevVecV[0]);
3871 if (!ShouldBeCallOrBinInst)
3874 if (!IsFirstCallOrBinInst &&
3875 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3878 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3880 IsFirstCallOrBinInst =
false;
3888 switch (*CommonBinOp) {
3889 case BinaryOperator::Add:
3890 case BinaryOperator::Mul:
3891 case BinaryOperator::Or:
3892 case BinaryOperator::And:
3893 case BinaryOperator::Xor: {
3903 ShouldBeCallOrBinInst ^= 1;
3910 InstWorklist.push(PrevVecV[1]);
3911 InstWorklist.push(PrevVecV[0]);
3915 if (ShouldBeCallOrBinInst ||
3916 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3919 if (SVInst != PrevVecV[1])
3922 ArrayRef<int> CurMask;
3928 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3929 if (Mask < ShuffleMaskHalf &&
3930 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3932 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3937 ShuffleMaskHalf *= 2;
3938 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3939 ExpectedParityMask >>= 1;
3942 SVInst->getType(), SVInst->getType(),
3946 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3949 ShouldBeCallOrBinInst ^= 1;
3956 if (ShouldBeCallOrBinInst)
3959 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3961 Value *FinalVecV = PrevVecV[0];
3973 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3976 if (NewCost >= OrigCost)
3979 auto *ReducedResult =
3981 replaceValue(
I, *ReducedResult);
3990bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3995 bool TruncOnly =
false;
3998 case Intrinsic::vector_reduce_add:
3999 case Intrinsic::vector_reduce_mul:
4002 case Intrinsic::vector_reduce_and:
4003 case Intrinsic::vector_reduce_or:
4004 case Intrinsic::vector_reduce_xor:
4011 Value *ReductionSrc =
I.getOperand(0);
4023 Type *ResultTy =
I.getType();
4026 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4036 if (OldCost <= NewCost || !NewCost.
isValid())
4040 II->getIntrinsicID(), {Src});
4042 replaceValue(
I, *NewCast);
4051 constexpr unsigned MaxVisited = 32;
4054 bool FoundReduction =
false;
4057 while (!WorkList.
empty()) {
4059 for (
User *U :
I->users()) {
4061 if (!UI || !Visited.
insert(UI).second)
4063 if (Visited.
size() > MaxVisited)
4069 switch (
II->getIntrinsicID()) {
4070 case Intrinsic::vector_reduce_add:
4071 case Intrinsic::vector_reduce_mul:
4072 case Intrinsic::vector_reduce_and:
4073 case Intrinsic::vector_reduce_or:
4074 case Intrinsic::vector_reduce_xor:
4075 case Intrinsic::vector_reduce_smin:
4076 case Intrinsic::vector_reduce_smax:
4077 case Intrinsic::vector_reduce_umin:
4078 case Intrinsic::vector_reduce_umax:
4079 FoundReduction =
true;
4092 return FoundReduction;
4105bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4110 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4118 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4120 if (!
I ||
I->getOperand(0)->getType() != VT)
4122 return any_of(
I->users(), [&](User *U) {
4123 return U != Op0 && U != Op1 &&
4124 !(isa<ShuffleVectorInst>(U) &&
4125 (InputShuffles.contains(cast<Instruction>(U)) ||
4126 isInstructionTriviallyDead(cast<Instruction>(U))));
4129 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4130 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4138 for (
auto *U :
I->users()) {
4140 if (!SV || SV->getType() != VT)
4142 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4143 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4150 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4154 if (FromReduction && Shuffles.
size() > 1)
4159 if (!FromReduction) {
4160 for (ShuffleVectorInst *SV : Shuffles) {
4161 for (
auto *U : SV->users()) {
4164 Shuffles.push_back(SSV);
4176 int MaxV1Elt = 0, MaxV2Elt = 0;
4177 unsigned NumElts = VT->getNumElements();
4178 for (ShuffleVectorInst *SVN : Shuffles) {
4179 SmallVector<int>
Mask;
4180 SVN->getShuffleMask(Mask);
4184 Value *SVOp0 = SVN->getOperand(0);
4185 Value *SVOp1 = SVN->getOperand(1);
4190 for (
int &Elem : Mask) {
4196 if (SVOp0 == Op1 && SVOp1 == Op0) {
4200 if (SVOp0 != Op0 || SVOp1 != Op1)
4206 SmallVector<int> ReconstructMask;
4207 for (
unsigned I = 0;
I <
Mask.size();
I++) {
4210 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
4211 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
4212 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
4213 return Mask[
I] ==
A.first;
4222 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
4223 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
4224 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
4238 sort(ReconstructMask);
4239 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
4247 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
4248 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
4260 if (InputShuffles.contains(SSV))
4262 return SV->getMaskValue(M);
4270 std::pair<int, int>
Y) {
4271 int MXA = GetBaseMaskValue(
A,
X.first);
4272 int MYA = GetBaseMaskValue(
A,
Y.first);
4275 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4276 return SortBase(SVI0A,
A,
B);
4278 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
4279 return SortBase(SVI1A,
A,
B);
4284 for (
const auto &Mask : OrigReconstructMasks) {
4285 SmallVector<int> ReconstructMask;
4286 for (
int M : Mask) {
4288 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
4289 assert(It !=
V.end() &&
"Expected all entries in Mask");
4290 return std::distance(
V.begin(), It);
4294 else if (M <
static_cast<int>(NumElts)) {
4295 ReconstructMask.
push_back(FindIndex(V1, M));
4297 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
4300 ReconstructMasks.
push_back(std::move(ReconstructMask));
4305 SmallVector<int> V1A, V1B, V2A, V2B;
4306 for (
unsigned I = 0;
I < V1.
size();
I++) {
4307 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
4308 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
4310 for (
unsigned I = 0;
I < V2.
size();
I++) {
4311 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
4312 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
4314 while (V1A.
size() < NumElts) {
4318 while (V2A.
size() < NumElts) {
4330 VT, VT, SV->getShuffleMask(),
CostKind);
4337 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
4338 unsigned MaxVectorSize =
4340 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
4341 if (MaxElementsInVector == 0)
4350 std::set<SmallVector<int, 4>> UniqueShuffles;
4355 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
4356 if (NumFullVectors < 2)
4357 return C + ShuffleCost;
4358 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
4359 unsigned NumUniqueGroups = 0;
4360 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
4363 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
4364 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
4365 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
4366 if (UniqueShuffles.insert(SubShuffle).second)
4367 NumUniqueGroups += 1;
4369 return C + ShuffleCost * NumUniqueGroups / NumGroups;
4375 SmallVector<int, 16>
Mask;
4376 SV->getShuffleMask(Mask);
4377 return AddShuffleMaskAdjustedCost(
C, Mask);
4380 auto AllShufflesHaveSameOperands =
4381 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
4382 if (InputShuffles.size() < 2)
4384 ShuffleVectorInst *FirstSV =
4391 std::next(InputShuffles.begin()), InputShuffles.end(),
4392 [&](Instruction *
I) {
4393 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
4394 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
4403 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
4405 if (AllShufflesHaveSameOperands(InputShuffles)) {
4406 UniqueShuffles.clear();
4407 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4410 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4416 FixedVectorType *Op0SmallVT =
4418 FixedVectorType *Op1SmallVT =
4423 UniqueShuffles.clear();
4424 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4426 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4428 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4431 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4433 <<
" vs CostAfter: " << CostAfter <<
"\n");
4434 if (CostBefore < CostAfter ||
4445 if (InputShuffles.contains(SSV))
4447 return SV->getOperand(
Op);
4451 GetShuffleOperand(SVI0A, 1), V1A);
4454 GetShuffleOperand(SVI0B, 1), V1B);
4457 GetShuffleOperand(SVI1A, 1), V2A);
4460 GetShuffleOperand(SVI1B, 1), V2B);
4465 I->copyIRFlags(Op0,
true);
4470 I->copyIRFlags(Op1,
true);
4472 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4475 replaceValue(*Shuffles[S], *NSV,
false);
4478 Worklist.pushValue(NSV0A);
4479 Worklist.pushValue(NSV0B);
4480 Worklist.pushValue(NSV1A);
4481 Worklist.pushValue(NSV1B);
4491bool VectorCombine::shrinkType(Instruction &
I) {
4492 Value *ZExted, *OtherOperand;
4498 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4502 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4504 if (
I.getOpcode() == Instruction::LShr) {
4521 Instruction::ZExt, BigTy, SmallTy,
4522 TargetTransformInfo::CastContextHint::None,
CostKind);
4527 for (User *U : ZExtOperand->
users()) {
4534 ShrinkCost += ZExtCost;
4549 ShrinkCost += ZExtCost;
4556 Instruction::Trunc, SmallTy, BigTy,
4557 TargetTransformInfo::CastContextHint::None,
CostKind);
4562 if (ShrinkCost > CurrentCost)
4566 Value *Op0 = ZExted;
4569 if (
I.getOperand(0) == OtherOperand)
4576 replaceValue(
I, *NewZExtr);
4582bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4583 Value *DstVec, *SrcVec;
4584 uint64_t ExtIdx, InsIdx;
4594 if (!DstVecTy || !SrcVecTy ||
4595 SrcVecTy->getElementType() != DstVecTy->getElementType())
4598 unsigned NumDstElts = DstVecTy->getNumElements();
4599 unsigned NumSrcElts = SrcVecTy->getNumElements();
4600 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4607 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4609 if (NeedDstSrcSwap) {
4611 Mask[InsIdx] = ExtIdx % NumDstElts;
4615 std::iota(
Mask.begin(),
Mask.end(), 0);
4616 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
4629 SmallVector<int> ExtToVecMask;
4630 if (!NeedExpOrNarrow) {
4635 nullptr, {DstVec, SrcVec});
4641 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
4644 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4648 if (!Ext->hasOneUse())
4651 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4652 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4655 if (OldCost < NewCost)
4658 if (NeedExpOrNarrow) {
4659 if (!NeedDstSrcSwap)
4672 replaceValue(
I, *Shuf);
4681bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4682 const APInt *SplatVal0, *SplatVal1;
4692 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4693 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4702 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4703 << *
I.getType() <<
" is too high.\n");
4707 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4708 NewSplatVal <<= Width;
4709 NewSplatVal |= SplatVal0->
zext(Width * 2);
4711 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4719bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4721 if (!OldLoad || !OldLoad->isSimple())
4728 unsigned const OldNumElements = OldLoadTy->getNumElements();
4734 using IndexRange = std::pair<int, int>;
4735 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4736 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4737 for (llvm::Use &Use :
I.uses()) {
4739 User *Shuffle =
Use.getUser();
4744 return std::nullopt;
4751 for (
int Index : Mask) {
4752 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4753 OutputRange.first = std::min(Index, OutputRange.first);
4754 OutputRange.second = std::max(Index, OutputRange.second);
4759 if (OutputRange.second < OutputRange.first)
4760 return std::nullopt;
4766 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4767 unsigned const NewNumElements = Indices->second + 1u;
4771 if (NewNumElements < OldNumElements) {
4776 Type *ElemTy = OldLoadTy->getElementType();
4778 Value *PtrOp = OldLoad->getPointerOperand();
4781 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4782 OldLoad->getPointerAddressSpace(),
CostKind);
4785 OldLoad->getPointerAddressSpace(),
CostKind);
4787 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4789 unsigned const MaxIndex = NewNumElements * 2u;
4791 for (llvm::Use &Use :
I.uses()) {
4793 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4799 for (
int Index : OldMask) {
4800 if (Index >=
static_cast<int>(MaxIndex))
4814 dbgs() <<
"Found a load used only by shufflevector instructions: "
4815 <<
I <<
"\n OldCost: " << OldCost
4816 <<
" vs NewCost: " << NewCost <<
"\n");
4818 if (OldCost < NewCost || !NewCost.
isValid())
4824 NewLoad->copyMetadata(
I);
4827 for (UseEntry &Use : NewUses) {
4828 ShuffleVectorInst *Shuffle =
Use.first;
4829 std::vector<int> &NewMask =
Use.second;
4836 replaceValue(*Shuffle, *NewShuffle,
false);
4849bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4851 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4855 ArrayRef<int> Mask0;
4856 ArrayRef<int> Mask1;
4869 auto const InputNumElements = InputVT->getNumElements();
4871 if (InputNumElements >= ResultVT->getNumElements())
4876 SmallVector<int, 16> NewMask;
4879 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4880 if (
M0 >= 0 &&
M1 >= 0)
4882 else if (
M0 == -1 &&
M1 == -1)
4895 int MaskOffset = NewMask[0
u];
4896 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4899 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4913 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4916 if (NewCost > OldCost)
4928 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4930 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4936 replaceValue(*Phi, *NewShuf1);
4942bool VectorCombine::run() {
4956 auto Opcode =
I.getOpcode();
4964 if (IsFixedVectorType) {
4966 case Instruction::InsertElement:
4967 if (vectorizeLoadInsert(
I))
4970 case Instruction::ShuffleVector:
4971 if (widenSubvectorLoad(
I))
4982 if (scalarizeOpOrCmp(
I))
4984 if (scalarizeLoad(
I))
4986 if (scalarizeExtExtract(
I))
4988 if (scalarizeVPIntrinsic(
I))
4990 if (foldInterleaveIntrinsics(
I))
4994 if (Opcode == Instruction::Store)
4995 if (foldSingleElementStore(
I))
4999 if (TryEarlyFoldsOnly)
5006 if (IsFixedVectorType) {
5008 case Instruction::InsertElement:
5009 if (foldInsExtFNeg(
I))
5011 if (foldInsExtBinop(
I))
5013 if (foldInsExtVectorToShuffle(
I))
5016 case Instruction::ShuffleVector:
5017 if (foldPermuteOfBinops(
I))
5019 if (foldShuffleOfBinops(
I))
5021 if (foldShuffleOfSelects(
I))
5023 if (foldShuffleOfCastops(
I))
5025 if (foldShuffleOfShuffles(
I))
5027 if (foldPermuteOfIntrinsic(
I))
5029 if (foldShufflesOfLengthChangingShuffles(
I))
5031 if (foldShuffleOfIntrinsics(
I))
5033 if (foldSelectShuffle(
I))
5035 if (foldShuffleToIdentity(
I))
5038 case Instruction::Load:
5039 if (shrinkLoadForShuffles(
I))
5042 case Instruction::BitCast:
5043 if (foldBitcastShuffle(
I))
5046 case Instruction::And:
5047 case Instruction::Or:
5048 case Instruction::Xor:
5049 if (foldBitOpOfCastops(
I))
5051 if (foldBitOpOfCastConstant(
I))
5054 case Instruction::PHI:
5055 if (shrinkPhiOfShuffles(
I))
5065 case Instruction::Call:
5066 if (foldShuffleFromReductions(
I))
5068 if (foldCastFromReductions(
I))
5071 case Instruction::ExtractElement:
5072 if (foldShuffleChainsToReduce(
I))
5075 case Instruction::ICmp:
5076 case Instruction::FCmp:
5077 if (foldExtractExtract(
I))
5080 case Instruction::Or:
5081 if (foldConcatOfBoolMasks(
I))
5086 if (foldExtractExtract(
I))
5088 if (foldExtractedCmps(
I))
5090 if (foldBinopOfReductions(
I))
5099 bool MadeChange =
false;
5100 for (BasicBlock &BB :
F) {
5112 if (!
I->isDebugOrPseudoInst())
5113 MadeChange |= FoldInst(*
I);
5120 while (!Worklist.isEmpty()) {
5130 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto make_scope_exit(Callable &&F)
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.