43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoadExtract(Instruction &
I);
133 bool scalarizeExtExtract(Instruction &
I);
134 bool foldConcatOfBoolMasks(Instruction &
I);
135 bool foldPermuteOfBinops(Instruction &
I);
136 bool foldShuffleOfBinops(Instruction &
I);
137 bool foldShuffleOfSelects(Instruction &
I);
138 bool foldShuffleOfCastops(Instruction &
I);
139 bool foldShuffleOfShuffles(Instruction &
I);
140 bool foldShuffleOfIntrinsics(Instruction &
I);
141 bool foldShuffleToIdentity(Instruction &
I);
142 bool foldShuffleFromReductions(Instruction &
I);
143 bool foldShuffleChainsToReduce(Instruction &
I);
144 bool foldCastFromReductions(Instruction &
I);
145 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
146 bool foldInterleaveIntrinsics(Instruction &
I);
147 bool shrinkType(Instruction &
I);
148 bool shrinkLoadForShuffles(Instruction &
I);
149 bool shrinkPhiOfShuffles(Instruction &
I);
151 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
157 Worklist.pushUsersToWorkList(*NewI);
158 Worklist.pushValue(NewI);
175 SmallPtrSet<Value *, 4> Visited;
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
185 NextInst = NextInst->getNextNode();
190 Worklist.pushUsersToWorkList(*OpI);
191 Worklist.pushValue(OpI);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
220 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
268 APInt
Offset(OffsetBitWidth, 0);
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
349bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
352 if (!Shuf->isIdentityWithPadding())
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
407ExtractElementInst *VectorCombine::getShuffleExtract(
408 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
456bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
457 ExtractElementInst *Ext1,
458 const Instruction &
I,
459 ExtractElementInst *&ConvertToShuffle,
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
542 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
570 ShufMask[NewIndex] = OldIndex;
571 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
623 V1,
"foldExtExtBinop");
628 VecBOInst->copyIRFlags(&
I);
634bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 ExtractElementInst *ExtractToChange;
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
697bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
700 uint64_t ExtIdx, InsIdx;
715 auto *DstVecScalarTy = DstVecTy->getScalarType();
717 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
722 unsigned NumDstElts = DstVecTy->getNumElements();
723 unsigned NumSrcElts = SrcVecTy->getNumElements();
724 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
730 SmallVector<int>
Mask(NumDstElts);
731 std::iota(
Mask.begin(),
Mask.end(), 0);
732 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
748 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
751 SmallVector<int> SrcMask;
754 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
756 DstVecTy, SrcVecTy, SrcMask,
CostKind);
760 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
762 if (NewCost > OldCost)
765 Value *NewShuf, *LenChgShuf =
nullptr;
779 replaceValue(
I, *NewShuf);
785bool VectorCombine::foldInsExtBinop(Instruction &
I) {
786 BinaryOperator *VecBinOp, *SclBinOp;
818 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
820 if (NewCost > OldCost)
831 NewInst->copyIRFlags(VecBinOp);
832 NewInst->andIRFlags(SclBinOp);
837 replaceValue(
I, *NewBO);
843bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
846 if (!BinOp || !BinOp->isBitwiseLogicOp())
852 if (!LHSCast || !RHSCast) {
853 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
859 if (CastOpcode != RHSCast->getOpcode())
863 switch (CastOpcode) {
864 case Instruction::BitCast:
865 case Instruction::Trunc:
866 case Instruction::SExt:
867 case Instruction::ZExt:
873 Value *LHSSrc = LHSCast->getOperand(0);
874 Value *RHSSrc = RHSCast->getOperand(0);
880 auto *SrcTy = LHSSrc->
getType();
881 auto *DstTy =
I.getType();
884 if (CastOpcode != Instruction::BitCast &&
889 if (!SrcTy->getScalarType()->isIntegerTy() ||
890 !DstTy->getScalarType()->isIntegerTy())
905 LHSCastCost + RHSCastCost;
916 if (!LHSCast->hasOneUse())
917 NewCost += LHSCastCost;
918 if (!RHSCast->hasOneUse())
919 NewCost += RHSCastCost;
922 <<
" NewCost=" << NewCost <<
"\n");
924 if (NewCost > OldCost)
929 BinOp->getName() +
".inner");
931 NewBinOp->copyIRFlags(BinOp);
945 replaceValue(
I, *Result);
954bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
970 switch (CastOpcode) {
971 case Instruction::BitCast:
972 case Instruction::ZExt:
973 case Instruction::SExt:
974 case Instruction::Trunc:
980 Value *LHSSrc = LHSCast->getOperand(0);
982 auto *SrcTy = LHSSrc->
getType();
983 auto *DstTy =
I.getType();
986 if (CastOpcode != Instruction::BitCast &&
991 if (!SrcTy->getScalarType()->isIntegerTy() ||
992 !DstTy->getScalarType()->isIntegerTy())
996 PreservedCastFlags RHSFlags;
1021 if (!LHSCast->hasOneUse())
1022 NewCost += LHSCastCost;
1024 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1025 <<
" NewCost=" << NewCost <<
"\n");
1027 if (NewCost > OldCost)
1032 LHSSrc, InvC,
I.getName() +
".inner");
1034 NewBinOp->copyIRFlags(&
I);
1054 replaceValue(
I, *Result);
1061bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1075 if (!DestTy || !SrcTy)
1078 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1079 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1080 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1090 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1091 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1095 SmallVector<int, 16> NewMask;
1096 if (DestEltSize <= SrcEltSize) {
1099 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1100 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1105 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1106 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1113 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1114 auto *NewShuffleTy =
1116 auto *OldShuffleTy =
1118 unsigned NumOps = IsUnary ? 1 : 2;
1128 TargetTransformInfo::CastContextHint::None,
1133 TargetTransformInfo::CastContextHint::None,
1136 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1137 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1139 if (NewCost > OldCost || !NewCost.
isValid())
1147 replaceValue(
I, *Shuf);
1154bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1168 if (!ScalarOp0 || !ScalarOp1)
1176 auto IsAllTrueMask = [](
Value *MaskVal) {
1179 return ConstValue->isAllOnesValue();
1193 SmallVector<int>
Mask;
1195 Mask.resize(FVTy->getNumElements(), 0);
1204 Args.push_back(
V->getType());
1205 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1210 std::optional<unsigned> FunctionalOpcode =
1212 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1213 if (!FunctionalOpcode) {
1222 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1232 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1234 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1237 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1240 if (OldCost < NewCost || !NewCost.
isValid())
1251 bool SafeToSpeculate;
1257 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1258 if (!SafeToSpeculate &&
1265 {ScalarOp0, ScalarOp1})
1267 ScalarOp0, ScalarOp1);
1276bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1281 if (!UO && !BO && !CI && !
II)
1289 if (Arg->getType() !=
II->getType() &&
1299 for (User *U :
I.users())
1306 std::optional<uint64_t>
Index;
1308 auto Ops =
II ?
II->args() :
I.operands();
1312 uint64_t InsIdx = 0;
1317 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1323 else if (InsIdx != *Index)
1340 if (!
Index.has_value())
1344 Type *ScalarTy = VecTy->getScalarType();
1345 assert(VecTy->isVectorTy() &&
1348 "Unexpected types for insert element into binop or cmp");
1350 unsigned Opcode =
I.getOpcode();
1358 }
else if (UO || BO) {
1362 IntrinsicCostAttributes ScalarICA(
1363 II->getIntrinsicID(), ScalarTy,
1366 IntrinsicCostAttributes VectorICA(
1367 II->getIntrinsicID(), VecTy,
1374 Value *NewVecC =
nullptr;
1376 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1379 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1381 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1395 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1397 II->getIntrinsicID(), Idx, &
TTI)))
1400 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1401 OldCost += InsertCost;
1402 NewCost += !
Op->hasOneUse() * InsertCost;
1406 if (OldCost < NewCost || !NewCost.
isValid())
1416 ++NumScalarIntrinsic;
1426 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1432 Scalar->setName(
I.getName() +
".scalar");
1437 ScalarInst->copyIRFlags(&
I);
1440 replaceValue(
I, *Insert);
1447bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1452 if (!BI || !
I.getType()->isIntegerTy(1))
1457 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1460 CmpPredicate
P0,
P1;
1472 uint64_t Index0, Index1;
1479 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1482 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1483 "Unknown ExtractElementInst");
1488 unsigned CmpOpcode =
1503 Ext0Cost + Ext1Cost + CmpCost * 2 +
1509 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1510 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1515 ShufMask[CheapIndex] = ExpensiveIndex;
1520 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1521 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1526 if (OldCost < NewCost || !NewCost.
isValid())
1536 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1537 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1540 replaceValue(
I, *NewExt);
1553 unsigned ReductionOpc =
1559 CostBeforeReduction =
1560 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1562 CostAfterReduction =
1563 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1567 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1573 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1580 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1583 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1585 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1588 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1589 CostAfterReduction =
TTI.getMulAccReductionCost(
1590 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1593 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1597bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1600 if (BinOpOpc == Instruction::Sub)
1601 ReductionIID = Intrinsic::vector_reduce_add;
1605 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1610 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1611 return II->getArgOperand(0);
1615 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1618 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1627 unsigned ReductionOpc =
1640 CostOfRedOperand0 + CostOfRedOperand1 +
1643 if (NewCost >= OldCost || !NewCost.
isValid())
1647 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1650 if (BinOpOpc == Instruction::Or)
1651 VectorBO = Builder.
CreateOr(V0, V1,
"",
1657 replaceValue(
I, *Rdx);
1665 unsigned NumScanned = 0;
1666 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1675class ScalarizationResult {
1676 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1681 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1682 : Status(Status), ToFreeze(ToFreeze) {}
1685 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1686 ~ScalarizationResult() {
1687 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1690 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1691 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1692 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1693 return {StatusTy::SafeWithFreeze, ToFreeze};
1697 bool isSafe()
const {
return Status == StatusTy::Safe; }
1699 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1702 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1707 Status = StatusTy::Unsafe;
1711 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1712 assert(isSafeWithFreeze() &&
1713 "should only be used when freezing is required");
1715 "UserI must be a user of ToFreeze");
1716 IRBuilder<>::InsertPointGuard Guard(Builder);
1721 if (
U.get() == ToFreeze)
1738 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1742 if (
C->getValue().ult(NumElements))
1743 return ScalarizationResult::safe();
1744 return ScalarizationResult::unsafe();
1749 return ScalarizationResult::unsafe();
1751 APInt Zero(IntWidth, 0);
1752 APInt MaxElts(IntWidth, NumElements);
1758 true, &AC, CtxI, &DT)))
1759 return ScalarizationResult::safe();
1760 return ScalarizationResult::unsafe();
1773 if (ValidIndices.
contains(IdxRange))
1774 return ScalarizationResult::safeWithFreeze(IdxBase);
1775 return ScalarizationResult::unsafe();
1787 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1799bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1811 if (!
match(
SI->getValueOperand(),
1818 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1821 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1822 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1823 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1827 if (ScalarizableIdx.isUnsafe() ||
1834 Worklist.
push(Load);
1836 if (ScalarizableIdx.isSafeWithFreeze())
1839 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1840 {ConstantInt::get(Idx->getType(), 0), Idx});
1844 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1847 replaceValue(
I, *NSI);
1856bool VectorCombine::scalarizeLoadExtract(Instruction &
I) {
1866 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1871 LI->getPointerAddressSpace(),
CostKind);
1875 unsigned NumInstChecked = 0;
1876 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1879 for (
auto &Pair : NeedFreeze)
1880 Pair.second.discard();
1886 for (User *U : LI->users()) {
1888 if (!UI || UI->getParent() != LI->getParent())
1893 if (UI->use_empty())
1899 for (Instruction &
I :
1900 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1907 LastCheckedInst = UI;
1912 if (ScalarIdx.isUnsafe())
1914 if (ScalarIdx.isSafeWithFreeze()) {
1915 NeedFreeze.try_emplace(UI, ScalarIdx);
1916 ScalarIdx.discard();
1922 Index ?
Index->getZExtValue() : -1);
1931 <<
"\n LoadExtractCost: " << OriginalCost
1932 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1934 if (ScalarizedCost >= OriginalCost)
1941 Type *ElemType = VecTy->getElementType();
1944 for (User *U : LI->users()) {
1946 Value *Idx = EI->getIndexOperand();
1949 auto It = NeedFreeze.find(EI);
1950 if (It != NeedFreeze.end())
1957 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1959 Align ScalarOpAlignment =
1961 NewLoad->setAlignment(ScalarOpAlignment);
1964 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1965 AAMDNodes OldAAMD = LI->getAAMetadata();
1969 replaceValue(*EI, *NewLoad,
false);
1972 FailureGuard.release();
1976bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
1991 Type *ScalarDstTy = DstTy->getElementType();
1992 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1998 unsigned ExtCnt = 0;
1999 bool ExtLane0 =
false;
2000 for (User *U :
Ext->users()) {
2014 Instruction::And, ScalarDstTy,
CostKind,
2017 (ExtCnt - ExtLane0) *
2019 Instruction::LShr, ScalarDstTy,
CostKind,
2022 if (ScalarCost > VectorCost)
2025 Value *ScalarV =
Ext->getOperand(0);
2032 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2033 bool AllExtractsTriggerUB =
true;
2034 ExtractElementInst *LastExtract =
nullptr;
2036 for (User *U :
Ext->users()) {
2039 AllExtractsTriggerUB =
false;
2043 if (!LastExtract || LastExtract->
comesBefore(Extract))
2044 LastExtract = Extract;
2046 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2047 !AllExtractsTriggerUB ||
2055 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2056 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2057 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2059 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2060 for (User *U :
Ext->users()) {
2066 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2067 : (Idx * SrcEltSizeInBits);
2070 U->replaceAllUsesWith(
And);
2078bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2079 Type *Ty =
I.getType();
2084 if (
DL->isBigEndian())
2095 uint64_t ShAmtX = 0;
2103 uint64_t ShAmtY = 0;
2111 if (ShAmtX > ShAmtY) {
2119 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2120 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2125 MaskTy->getNumElements() != ShAmtDiff ||
2126 MaskTy->getNumElements() > (
BitWidth / 2))
2131 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2132 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2135 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2152 if (Ty != ConcatIntTy)
2158 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2159 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2162 if (NewCost > OldCost)
2172 if (Ty != ConcatIntTy) {
2182 replaceValue(
I, *Result);
2188bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2189 BinaryOperator *BinOp;
2190 ArrayRef<int> OuterMask;
2199 Value *Op00, *Op01, *Op10, *Op11;
2200 ArrayRef<int> Mask0, Mask1;
2207 if (!Match0 && !Match1)
2220 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2223 unsigned NumSrcElts = BinOpTy->getNumElements();
2228 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2232 SmallVector<int> NewMask0, NewMask1;
2233 for (
int M : OuterMask) {
2234 if (M < 0 || M >= (
int)NumSrcElts) {
2238 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2239 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2243 unsigned NumOpElts = Op0Ty->getNumElements();
2244 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2245 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2247 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2248 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2255 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2271 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2275 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2277 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2278 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2282 if (NewCost > OldCost)
2293 NewInst->copyIRFlags(BinOp);
2297 replaceValue(
I, *NewBO);
2303bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2304 ArrayRef<int> OldMask;
2311 if (
LHS->getOpcode() !=
RHS->getOpcode())
2315 bool IsCommutative =
false;
2324 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2335 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2338 unsigned NumSrcElts = BinOpTy->getNumElements();
2341 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2344 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2345 if (M >= (
int)NumSrcElts)
2349 SmallVector<int> NewMask0(OldMask);
2357 SmallVector<int> NewMask1(OldMask);
2380 ArrayRef<int> InnerMask;
2382 m_Mask(InnerMask)))) &&
2385 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2397 bool ReducedInstCount =
false;
2398 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2399 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2400 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2401 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2403 auto *ShuffleCmpTy =
2420 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2427 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2435 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2439 NewInst->copyIRFlags(
LHS);
2440 NewInst->andIRFlags(
RHS);
2445 replaceValue(
I, *NewBO);
2452bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2454 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2463 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2469 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2470 ((SI0FOp !=
nullptr) &&
2471 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2477 auto SelOp = Instruction::Select;
2484 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2488 Mask,
CostKind, 0,
nullptr, {C1, C2});
2494 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2499 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2501 if (NewCost > OldCost)
2510 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2511 SI0FOp->getFastMathFlags());
2513 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2518 replaceValue(
I, *NewSel);
2524bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2526 ArrayRef<int> OldMask;
2535 if (!C0 || (IsBinaryShuffle && !C1))
2542 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2545 if (IsBinaryShuffle) {
2546 if (C0->getSrcTy() != C1->getSrcTy())
2549 if (Opcode != C1->getOpcode()) {
2551 Opcode = Instruction::SExt;
2560 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2563 unsigned NumSrcElts = CastSrcTy->getNumElements();
2564 unsigned NumDstElts = CastDstTy->getNumElements();
2565 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2566 "Only bitcasts expected to alter src/dst element counts");
2570 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2571 (NumDstElts % NumSrcElts) != 0)
2574 SmallVector<int, 16> NewMask;
2575 if (NumSrcElts >= NumDstElts) {
2578 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2579 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2584 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2585 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2590 auto *NewShuffleDstTy =
2599 if (IsBinaryShuffle)
2614 if (IsBinaryShuffle) {
2624 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2626 if (NewCost > OldCost)
2630 if (IsBinaryShuffle)
2640 NewInst->copyIRFlags(C0);
2641 if (IsBinaryShuffle)
2642 NewInst->andIRFlags(C1);
2646 replaceValue(
I, *Cast);
2656bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2657 ArrayRef<int> OuterMask;
2658 Value *OuterV0, *OuterV1;
2663 ArrayRef<int> InnerMask0, InnerMask1;
2664 Value *X0, *X1, *Y0, *Y1;
2669 if (!Match0 && !Match1)
2674 SmallVector<int, 16> PoisonMask1;
2679 InnerMask1 = PoisonMask1;
2683 X0 = Match0 ? X0 : OuterV0;
2684 Y0 = Match0 ? Y0 : OuterV0;
2685 X1 = Match1 ? X1 : OuterV1;
2686 Y1 = Match1 ? Y1 : OuterV1;
2690 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2694 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2695 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2700 SmallVector<int, 16> NewMask(OuterMask);
2701 Value *NewX =
nullptr, *NewY =
nullptr;
2702 for (
int &M : NewMask) {
2703 Value *Src =
nullptr;
2704 if (0 <= M && M < (
int)NumImmElts) {
2708 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2709 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2711 }
else if (M >= (
int)NumImmElts) {
2716 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2717 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2721 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2730 if (!NewX || NewX == Src) {
2734 if (!NewY || NewY == Src) {
2750 replaceValue(
I, *NewX);
2767 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2773 nullptr, {NewX, NewY});
2775 NewCost += InnerCost0;
2777 NewCost += InnerCost1;
2780 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2782 if (NewCost > OldCost)
2786 replaceValue(
I, *Shuf);
2792bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2794 ArrayRef<int> OldMask;
2805 if (IID != II1->getIntrinsicID())
2810 if (!ShuffleDstTy || !II0Ty)
2816 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2818 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2825 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2829 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2831 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2835 ShuffleDstTy->getNumElements());
2841 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2845 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2848 if (NewCost > OldCost)
2852 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2857 II1->getArgOperand(
I), OldMask);
2865 NewInst->copyIRFlags(II0);
2866 NewInst->andIRFlags(II1);
2869 replaceValue(
I, *NewIntrinsic);
2879 int M = SV->getMaskValue(Lane);
2882 if (
static_cast<unsigned>(M) < NumElts) {
2883 U = &SV->getOperandUse(0);
2886 U = &SV->getOperandUse(1);
2897 auto [U, Lane] = IL;
2911 unsigned NumElts = Ty->getNumElements();
2912 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2918 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2924 unsigned NumSlices = Item.
size() / NumElts;
2929 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2930 Use *SliceV = Item[Slice * NumElts].first;
2931 if (!SliceV || SliceV->get()->
getType() != Ty)
2933 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2934 auto [V, Lane] = Item[Slice * NumElts + Elt];
2935 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2948 auto [FrontU, FrontLane] = Item.
front();
2950 if (IdentityLeafs.
contains(FrontU)) {
2951 return FrontU->get();
2955 return Builder.CreateShuffleVector(FrontU->get(), Mask);
2957 if (ConcatLeafs.
contains(FrontU)) {
2961 for (
unsigned S = 0; S < Values.
size(); ++S)
2962 Values[S] = Item[S * NumElts].first->get();
2964 while (Values.
size() > 1) {
2967 std::iota(Mask.begin(), Mask.end(), 0);
2969 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2971 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
2979 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2981 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
2984 Ops[Idx] =
II->getOperand(Idx);
2988 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2993 for (
const auto &Lane : Item)
3006 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3016 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3021 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3035bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3037 if (!Ty ||
I.use_empty())
3041 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3046 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3047 unsigned NumVisited = 0;
3049 while (!Worklist.
empty()) {
3054 auto [FrontU, FrontLane] = Item.
front();
3062 return X->getType() ==
Y->getType() &&
3067 if (FrontLane == 0 &&
3069 Ty->getNumElements() &&
3072 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3073 E.value().second == (int)
E.index());
3075 IdentityLeafs.
insert(FrontU);
3080 C &&
C->getSplatValue() &&
3088 SplatLeafs.
insert(FrontU);
3093 auto [FrontU, FrontLane] = Item.
front();
3094 auto [
U, Lane] = IL;
3095 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3097 SplatLeafs.
insert(FrontU);
3103 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3107 Value *
V = IL.first->get();
3113 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3116 if (CI->getSrcTy()->getScalarType() !=
3121 SI->getOperand(0)->getType() !=
3128 II->getIntrinsicID() ==
3130 !
II->hasOperandBundles());
3137 BO && BO->isIntDivRem())
3142 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3143 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3150 if (DstTy && SrcTy &&
3151 SrcTy->getNumElements() == DstTy->getNumElements()) {
3162 !
II->hasOperandBundles()) {
3163 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3182 ConcatLeafs.
insert(FrontU);
3189 if (NumVisited <= 1)
3192 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3198 ConcatLeafs, Builder, &
TTI);
3199 replaceValue(
I, *V);
3206bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3210 switch (
II->getIntrinsicID()) {
3211 case Intrinsic::vector_reduce_add:
3212 case Intrinsic::vector_reduce_mul:
3213 case Intrinsic::vector_reduce_and:
3214 case Intrinsic::vector_reduce_or:
3215 case Intrinsic::vector_reduce_xor:
3216 case Intrinsic::vector_reduce_smin:
3217 case Intrinsic::vector_reduce_smax:
3218 case Intrinsic::vector_reduce_umin:
3219 case Intrinsic::vector_reduce_umax:
3228 std::queue<Value *> Worklist;
3229 SmallPtrSet<Value *, 4> Visited;
3230 ShuffleVectorInst *Shuffle =
nullptr;
3234 while (!Worklist.empty()) {
3235 Value *CV = Worklist.front();
3247 if (CI->isBinaryOp()) {
3248 for (
auto *
Op : CI->operand_values())
3252 if (Shuffle && Shuffle != SV)
3269 for (
auto *V : Visited)
3270 for (
auto *U :
V->users())
3271 if (!Visited.contains(U) && U != &
I)
3274 FixedVectorType *VecType =
3278 FixedVectorType *ShuffleInputType =
3280 if (!ShuffleInputType)
3286 SmallVector<int> ConcatMask;
3288 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3289 bool UsesSecondVec =
3290 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3297 ShuffleInputType, ConcatMask,
CostKind);
3299 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3301 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3303 bool MadeChanges =
false;
3304 if (NewCost < OldCost) {
3308 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3309 replaceValue(*Shuffle, *NewShuffle);
3315 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3361bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3363 std::queue<Value *> InstWorklist;
3367 std::optional<unsigned int> CommonCallOp = std::nullopt;
3368 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3370 bool IsFirstCallOrBinInst =
true;
3371 bool ShouldBeCallOrBinInst =
true;
3377 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3387 int64_t
VecSize = FVT->getNumElements();
3393 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3394 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3404 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3405 Cur = (Cur + 1) / 2, --
Mask) {
3407 ExpectedParityMask |= (1ll <<
Mask);
3410 InstWorklist.push(VecOpEE);
3412 while (!InstWorklist.empty()) {
3413 Value *CI = InstWorklist.front();
3417 if (!ShouldBeCallOrBinInst)
3420 if (!IsFirstCallOrBinInst &&
3421 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3426 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3428 IsFirstCallOrBinInst =
false;
3431 CommonCallOp =
II->getIntrinsicID();
3432 if (
II->getIntrinsicID() != *CommonCallOp)
3435 switch (
II->getIntrinsicID()) {
3436 case Intrinsic::umin:
3437 case Intrinsic::umax:
3438 case Intrinsic::smin:
3439 case Intrinsic::smax: {
3440 auto *Op0 =
II->getOperand(0);
3441 auto *Op1 =
II->getOperand(1);
3449 ShouldBeCallOrBinInst ^= 1;
3451 IntrinsicCostAttributes ICA(
3452 *CommonCallOp,
II->getType(),
3453 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3460 InstWorklist.push(PrevVecV[1]);
3461 InstWorklist.push(PrevVecV[0]);
3465 if (!ShouldBeCallOrBinInst)
3468 if (!IsFirstCallOrBinInst &&
3469 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3472 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3474 IsFirstCallOrBinInst =
false;
3482 switch (*CommonBinOp) {
3483 case BinaryOperator::Add:
3484 case BinaryOperator::Mul:
3485 case BinaryOperator::Or:
3486 case BinaryOperator::And:
3487 case BinaryOperator::Xor: {
3497 ShouldBeCallOrBinInst ^= 1;
3504 InstWorklist.push(PrevVecV[1]);
3505 InstWorklist.push(PrevVecV[0]);
3509 if (ShouldBeCallOrBinInst ||
3510 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3513 if (SVInst != PrevVecV[1])
3516 ArrayRef<int> CurMask;
3522 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3523 if (Mask < ShuffleMaskHalf &&
3524 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3526 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3531 ShuffleMaskHalf *= 2;
3532 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3533 ExpectedParityMask >>= 1;
3536 SVInst->getType(), SVInst->getType(),
3540 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3543 ShouldBeCallOrBinInst ^= 1;
3550 if (ShouldBeCallOrBinInst)
3553 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3555 Value *FinalVecV = PrevVecV[0];
3567 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3570 if (NewCost >= OrigCost)
3573 auto *ReducedResult =
3575 replaceValue(
I, *ReducedResult);
3584bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3589 bool TruncOnly =
false;
3592 case Intrinsic::vector_reduce_add:
3593 case Intrinsic::vector_reduce_mul:
3596 case Intrinsic::vector_reduce_and:
3597 case Intrinsic::vector_reduce_or:
3598 case Intrinsic::vector_reduce_xor:
3605 Value *ReductionSrc =
I.getOperand(0);
3617 Type *ResultTy =
I.getType();
3620 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3630 if (OldCost <= NewCost || !NewCost.
isValid())
3634 II->getIntrinsicID(), {Src});
3636 replaceValue(
I, *NewCast);
3645 constexpr unsigned MaxVisited = 32;
3648 bool FoundReduction =
false;
3651 while (!WorkList.
empty()) {
3653 for (
User *U :
I->users()) {
3655 if (!UI || !Visited.
insert(UI).second)
3657 if (Visited.
size() > MaxVisited)
3663 switch (
II->getIntrinsicID()) {
3664 case Intrinsic::vector_reduce_add:
3665 case Intrinsic::vector_reduce_mul:
3666 case Intrinsic::vector_reduce_and:
3667 case Intrinsic::vector_reduce_or:
3668 case Intrinsic::vector_reduce_xor:
3669 case Intrinsic::vector_reduce_smin:
3670 case Intrinsic::vector_reduce_smax:
3671 case Intrinsic::vector_reduce_umin:
3672 case Intrinsic::vector_reduce_umax:
3673 FoundReduction =
true;
3686 return FoundReduction;
3699bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3704 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3712 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3714 if (!
I ||
I->getOperand(0)->getType() != VT)
3716 return any_of(
I->users(), [&](User *U) {
3717 return U != Op0 && U != Op1 &&
3718 !(isa<ShuffleVectorInst>(U) &&
3719 (InputShuffles.contains(cast<Instruction>(U)) ||
3720 isInstructionTriviallyDead(cast<Instruction>(U))));
3723 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3724 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3732 for (
auto *U :
I->users()) {
3734 if (!SV || SV->getType() != VT)
3736 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3737 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3744 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3748 if (FromReduction && Shuffles.
size() > 1)
3753 if (!FromReduction) {
3754 for (ShuffleVectorInst *SV : Shuffles) {
3755 for (
auto *U : SV->users()) {
3758 Shuffles.push_back(SSV);
3770 int MaxV1Elt = 0, MaxV2Elt = 0;
3771 unsigned NumElts = VT->getNumElements();
3772 for (ShuffleVectorInst *SVN : Shuffles) {
3773 SmallVector<int>
Mask;
3774 SVN->getShuffleMask(Mask);
3778 Value *SVOp0 = SVN->getOperand(0);
3779 Value *SVOp1 = SVN->getOperand(1);
3784 for (
int &Elem : Mask) {
3790 if (SVOp0 == Op1 && SVOp1 == Op0) {
3794 if (SVOp0 != Op0 || SVOp1 != Op1)
3800 SmallVector<int> ReconstructMask;
3801 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3804 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3805 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3806 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3807 return Mask[
I] ==
A.first;
3816 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3817 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3818 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3832 sort(ReconstructMask);
3833 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3841 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3842 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3854 if (InputShuffles.contains(SSV))
3856 return SV->getMaskValue(M);
3864 std::pair<int, int>
Y) {
3865 int MXA = GetBaseMaskValue(
A,
X.first);
3866 int MYA = GetBaseMaskValue(
A,
Y.first);
3869 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3870 return SortBase(SVI0A,
A,
B);
3872 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3873 return SortBase(SVI1A,
A,
B);
3878 for (
const auto &Mask : OrigReconstructMasks) {
3879 SmallVector<int> ReconstructMask;
3880 for (
int M : Mask) {
3882 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3883 assert(It !=
V.end() &&
"Expected all entries in Mask");
3884 return std::distance(
V.begin(), It);
3888 else if (M <
static_cast<int>(NumElts)) {
3889 ReconstructMask.
push_back(FindIndex(V1, M));
3891 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3894 ReconstructMasks.
push_back(std::move(ReconstructMask));
3899 SmallVector<int> V1A, V1B, V2A, V2B;
3900 for (
unsigned I = 0;
I < V1.
size();
I++) {
3901 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3902 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3904 for (
unsigned I = 0;
I < V2.
size();
I++) {
3905 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3906 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3908 while (V1A.
size() < NumElts) {
3912 while (V2A.
size() < NumElts) {
3924 VT, VT, SV->getShuffleMask(),
CostKind);
3931 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3932 unsigned MaxVectorSize =
3934 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3935 if (MaxElementsInVector == 0)
3944 std::set<SmallVector<int, 4>> UniqueShuffles;
3949 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3950 if (NumFullVectors < 2)
3951 return C + ShuffleCost;
3952 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
3953 unsigned NumUniqueGroups = 0;
3954 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3957 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3958 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3959 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3960 if (UniqueShuffles.insert(SubShuffle).second)
3961 NumUniqueGroups += 1;
3963 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3969 SmallVector<int, 16>
Mask;
3970 SV->getShuffleMask(Mask);
3971 return AddShuffleMaskAdjustedCost(
C, Mask);
3974 auto AllShufflesHaveSameOperands =
3975 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
3976 if (InputShuffles.size() < 2)
3978 ShuffleVectorInst *FirstSV =
3985 std::next(InputShuffles.begin()), InputShuffles.end(),
3986 [&](Instruction *
I) {
3987 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3988 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3997 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3999 if (AllShufflesHaveSameOperands(InputShuffles)) {
4000 UniqueShuffles.clear();
4001 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4004 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
4010 FixedVectorType *Op0SmallVT =
4012 FixedVectorType *Op1SmallVT =
4017 UniqueShuffles.clear();
4018 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
4020 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
4022 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
4025 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
4027 <<
" vs CostAfter: " << CostAfter <<
"\n");
4028 if (CostBefore < CostAfter ||
4039 if (InputShuffles.contains(SSV))
4041 return SV->getOperand(
Op);
4045 GetShuffleOperand(SVI0A, 1), V1A);
4048 GetShuffleOperand(SVI0B, 1), V1B);
4051 GetShuffleOperand(SVI1A, 1), V2A);
4054 GetShuffleOperand(SVI1B, 1), V2B);
4059 I->copyIRFlags(Op0,
true);
4064 I->copyIRFlags(Op1,
true);
4066 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4069 replaceValue(*Shuffles[S], *NSV,
false);
4072 Worklist.pushValue(NSV0A);
4073 Worklist.pushValue(NSV0B);
4074 Worklist.pushValue(NSV1A);
4075 Worklist.pushValue(NSV1B);
4085bool VectorCombine::shrinkType(Instruction &
I) {
4086 Value *ZExted, *OtherOperand;
4092 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4096 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4098 if (
I.getOpcode() == Instruction::LShr) {
4115 Instruction::ZExt, BigTy, SmallTy,
4116 TargetTransformInfo::CastContextHint::None,
CostKind);
4121 for (User *U : ZExtOperand->
users()) {
4128 ShrinkCost += ZExtCost;
4143 ShrinkCost += ZExtCost;
4150 Instruction::Trunc, SmallTy, BigTy,
4151 TargetTransformInfo::CastContextHint::None,
CostKind);
4156 if (ShrinkCost > CurrentCost)
4160 Value *Op0 = ZExted;
4163 if (
I.getOperand(0) == OtherOperand)
4170 replaceValue(
I, *NewZExtr);
4176bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4177 Value *DstVec, *SrcVec;
4178 uint64_t ExtIdx, InsIdx;
4188 if (!DstVecTy || !SrcVecTy ||
4189 SrcVecTy->getElementType() != DstVecTy->getElementType())
4192 unsigned NumDstElts = DstVecTy->getNumElements();
4193 unsigned NumSrcElts = SrcVecTy->getNumElements();
4194 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4201 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4202 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4204 if (NeedDstSrcSwap) {
4206 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4209 Mask[InsIdx] = ExtIdx;
4213 std::iota(
Mask.begin(),
Mask.end(), 0);
4214 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4215 Mask[InsIdx] = NumDstElts;
4217 Mask[InsIdx] = ExtIdx + NumDstElts;
4230 SmallVector<int> ExtToVecMask;
4231 if (!NeedExpOrNarrow) {
4236 nullptr, {DstVec, SrcVec});
4242 if (IsExtIdxInBounds)
4243 ExtToVecMask[ExtIdx] = ExtIdx;
4245 ExtToVecMask[0] = ExtIdx;
4248 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4252 if (!
Ext->hasOneUse())
4255 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4256 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4259 if (OldCost < NewCost)
4262 if (NeedExpOrNarrow) {
4263 if (!NeedDstSrcSwap)
4276 replaceValue(
I, *Shuf);
4285bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4286 const APInt *SplatVal0, *SplatVal1;
4296 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4297 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4306 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4307 << *
I.getType() <<
" is too high.\n");
4311 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4312 NewSplatVal <<= Width;
4313 NewSplatVal |= SplatVal0->
zext(Width * 2);
4315 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4323bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4325 if (!OldLoad || !OldLoad->isSimple())
4332 unsigned const OldNumElements = OldLoadTy->getNumElements();
4338 using IndexRange = std::pair<int, int>;
4339 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4340 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4341 for (llvm::Use &Use :
I.uses()) {
4343 User *Shuffle =
Use.getUser();
4348 return std::nullopt;
4355 for (
int Index : Mask) {
4356 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4357 OutputRange.first = std::min(Index, OutputRange.first);
4358 OutputRange.second = std::max(Index, OutputRange.second);
4363 if (OutputRange.second < OutputRange.first)
4364 return std::nullopt;
4370 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4371 unsigned const NewNumElements = Indices->second + 1u;
4375 if (NewNumElements < OldNumElements) {
4380 Type *ElemTy = OldLoadTy->getElementType();
4382 Value *PtrOp = OldLoad->getPointerOperand();
4385 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4386 OldLoad->getPointerAddressSpace(),
CostKind);
4389 OldLoad->getPointerAddressSpace(),
CostKind);
4391 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4393 unsigned const MaxIndex = NewNumElements * 2u;
4395 for (llvm::Use &Use :
I.uses()) {
4397 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4403 for (
int Index : OldMask) {
4404 if (Index >=
static_cast<int>(MaxIndex))
4418 dbgs() <<
"Found a load used only by shufflevector instructions: "
4419 <<
I <<
"\n OldCost: " << OldCost
4420 <<
" vs NewCost: " << NewCost <<
"\n");
4422 if (OldCost < NewCost || !NewCost.
isValid())
4428 NewLoad->copyMetadata(
I);
4431 for (UseEntry &Use : NewUses) {
4432 ShuffleVectorInst *Shuffle =
Use.first;
4433 std::vector<int> &NewMask =
Use.second;
4440 replaceValue(*Shuffle, *NewShuffle,
false);
4453bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4455 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4459 ArrayRef<int> Mask0;
4460 ArrayRef<int> Mask1;
4473 auto const InputNumElements = InputVT->getNumElements();
4475 if (InputNumElements >= ResultVT->getNumElements())
4480 SmallVector<int, 16> NewMask;
4483 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4484 if (
M0 >= 0 &&
M1 >= 0)
4486 else if (
M0 == -1 &&
M1 == -1)
4499 int MaskOffset = NewMask[0
u];
4500 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
4503 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4517 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4520 if (NewCost > OldCost)
4532 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4534 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4540 replaceValue(*Phi, *NewShuf1);
4546bool VectorCombine::run() {
4560 auto Opcode =
I.getOpcode();
4568 if (IsFixedVectorType) {
4570 case Instruction::InsertElement:
4571 if (vectorizeLoadInsert(
I))
4574 case Instruction::ShuffleVector:
4575 if (widenSubvectorLoad(
I))
4586 if (scalarizeOpOrCmp(
I))
4588 if (scalarizeLoadExtract(
I))
4590 if (scalarizeExtExtract(
I))
4592 if (scalarizeVPIntrinsic(
I))
4594 if (foldInterleaveIntrinsics(
I))
4598 if (Opcode == Instruction::Store)
4599 if (foldSingleElementStore(
I))
4603 if (TryEarlyFoldsOnly)
4610 if (IsFixedVectorType) {
4612 case Instruction::InsertElement:
4613 if (foldInsExtFNeg(
I))
4615 if (foldInsExtBinop(
I))
4617 if (foldInsExtVectorToShuffle(
I))
4620 case Instruction::ShuffleVector:
4621 if (foldPermuteOfBinops(
I))
4623 if (foldShuffleOfBinops(
I))
4625 if (foldShuffleOfSelects(
I))
4627 if (foldShuffleOfCastops(
I))
4629 if (foldShuffleOfShuffles(
I))
4631 if (foldShuffleOfIntrinsics(
I))
4633 if (foldSelectShuffle(
I))
4635 if (foldShuffleToIdentity(
I))
4638 case Instruction::Load:
4639 if (shrinkLoadForShuffles(
I))
4642 case Instruction::BitCast:
4643 if (foldBitcastShuffle(
I))
4646 case Instruction::And:
4647 case Instruction::Or:
4648 case Instruction::Xor:
4649 if (foldBitOpOfCastops(
I))
4651 if (foldBitOpOfCastConstant(
I))
4654 case Instruction::PHI:
4655 if (shrinkPhiOfShuffles(
I))
4665 case Instruction::Call:
4666 if (foldShuffleFromReductions(
I))
4668 if (foldCastFromReductions(
I))
4671 case Instruction::ExtractElement:
4672 if (foldShuffleChainsToReduce(
I))
4675 case Instruction::ICmp:
4676 case Instruction::FCmp:
4677 if (foldExtractExtract(
I))
4680 case Instruction::Or:
4681 if (foldConcatOfBoolMasks(
I))
4686 if (foldExtractExtract(
I))
4688 if (foldExtractedCmps(
I))
4690 if (foldBinopOfReductions(
I))
4699 bool MadeChange =
false;
4700 for (BasicBlock &BB :
F) {
4712 if (!
I->isDebugOrPseudoInst())
4713 MadeChange |= FoldInst(*
I);
4720 while (!Worklist.isEmpty()) {
4730 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.