26#include "llvm/IR/IntrinsicsARM.h"
45#define DEBUG_TYPE "armtti"
49 cl::desc(
"Enable the generation of masked loads and stores"));
53 cl::desc(
"Disable the generation of low-overhead loops"));
57 cl::desc(
"Enable the generation of WLS loops"));
61 cl::desc(
"Enable the widening of global strings to alignment boundaries"));
72 "Threshold for forced unrolling of small loops in Arm architecture"));
84 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
86 : IntrAlign->getLimitedValue();
91 return Builder.CreateAlignedLoad(
II.getType(),
II.getArgOperand(0),
98 if (ST->hasMVEIntegerOps())
101 if (L->getHeader()->getParent()->hasOptSize())
104 if (ST->isMClass() && ST->isThumb2() &&
105 L->getNumBlocks() == 1)
111std::optional<Instruction *>
118 case Intrinsic::arm_neon_vld1: {
128 case Intrinsic::arm_neon_vld2:
129 case Intrinsic::arm_neon_vld3:
130 case Intrinsic::arm_neon_vld4:
131 case Intrinsic::arm_neon_vld2lane:
132 case Intrinsic::arm_neon_vld3lane:
133 case Intrinsic::arm_neon_vld4lane:
134 case Intrinsic::arm_neon_vst1:
135 case Intrinsic::arm_neon_vst2:
136 case Intrinsic::arm_neon_vst3:
137 case Intrinsic::arm_neon_vst4:
138 case Intrinsic::arm_neon_vst2lane:
139 case Intrinsic::arm_neon_vst3lane:
140 case Intrinsic::arm_neon_vst4lane: {
144 unsigned AlignArg =
II.arg_size() - 1;
145 Value *AlignArgOp =
II.getArgOperand(AlignArg);
156 case Intrinsic::arm_neon_vld1x2:
157 case Intrinsic::arm_neon_vld1x3:
158 case Intrinsic::arm_neon_vld1x4:
159 case Intrinsic::arm_neon_vst1x2:
160 case Intrinsic::arm_neon_vst1x3:
161 case Intrinsic::arm_neon_vst1x4: {
165 Align OldAlign =
II.getParamAlign(0).valueOrOne();
166 if (NewAlign > OldAlign)
172 case Intrinsic::arm_mve_pred_i2v: {
173 Value *Arg =
II.getArgOperand(0);
186 if (CI->getValue().trunc(16).isAllOnes()) {
201 case Intrinsic::arm_mve_pred_v2i: {
202 Value *Arg =
II.getArgOperand(0);
209 if (
II.getMetadata(LLVMContext::MD_range))
214 if (
auto CurrentRange =
II.getRange()) {
216 if (
Range == CurrentRange)
221 II.addRetAttr(Attribute::NoUndef);
224 case Intrinsic::arm_mve_vadc:
225 case Intrinsic::arm_mve_vadc_predicated: {
227 (
II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
228 assert(
II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
229 "Bad type for intrinsic!");
238 case Intrinsic::arm_mve_vmldava: {
240 if (
I->hasOneUse()) {
245 Value *OpX =
I->getOperand(4);
246 Value *OpY =
I->getOperand(5);
252 {
I->getOperand(0),
I->getOperand(1),
253 I->getOperand(2), OpZ, OpX, OpY});
269 SimplifyAndSetOp)
const {
274 auto SimplifyNarrowInstrTopBottom =[&](
unsigned TopOpc) {
283 SimplifyAndSetOp(&
II, 0, OrigDemandedElts & DemandedElts, UndefElts);
290 switch (
II.getIntrinsicID()) {
293 case Intrinsic::arm_mve_vcvt_narrow:
294 SimplifyNarrowInstrTopBottom(2);
296 case Intrinsic::arm_mve_vqmovn:
297 SimplifyNarrowInstrTopBottom(4);
299 case Intrinsic::arm_mve_vshrn:
300 SimplifyNarrowInstrTopBottom(7);
309 assert(Ty->isIntegerTy());
311 unsigned Bits = Ty->getPrimitiveSizeInBits();
312 if (Bits == 0 || Imm.getActiveBits() >= 64)
315 int64_t SImmVal = Imm.getSExtValue();
316 uint64_t ZImmVal = Imm.getZExtValue();
317 if (!ST->isThumb()) {
318 if ((SImmVal >= 0 && SImmVal < 65536) ||
322 return ST->hasV6T2Ops() ? 2 : 3;
324 if (ST->isThumb2()) {
325 if ((SImmVal >= 0 && SImmVal < 65536) ||
329 return ST->hasV6T2Ops() ? 2 : 3;
332 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
345 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
361 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
363 auto isSSatMin = [&](
Value *MinInst) {
365 Value *MinLHS, *MinRHS;
389 if (Imm.getBitWidth() != 64 ||
408 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
409 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
415 if (Opcode == Instruction::GetElementPtr && Idx != 0)
418 if (Opcode == Instruction::And) {
420 if (Imm == 255 || Imm == 65535)
427 if (Opcode == Instruction::Add)
432 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
433 Ty->getIntegerBitWidth() == 32) {
434 int64_t NegImm = -Imm.getSExtValue();
435 if (ST->isThumb2() && NegImm < 1<<12)
438 if (ST->isThumb() && NegImm < 1<<8)
444 if (Opcode == Instruction::Xor && Imm.isAllOnes())
449 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
450 Ty->getIntegerBitWidth() <= 32) {
461 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
475 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
490 int ISD = TLI->InstructionOpcodeToISD(Opcode);
496 return Cost == 0 ? 0 : 1;
499 auto IsLegalFPType = [
this](
EVT VT) {
501 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
502 (EltVT == MVT::f64 && ST->hasFP64()) ||
503 (EltVT == MVT::f16 && ST->hasFullFP16());
506 EVT SrcTy = TLI->getValueType(
DL, Src);
507 EVT DstTy = TLI->getValueType(
DL, Dst);
509 if (!SrcTy.isSimple() || !DstTy.
isSimple())
516 if ((ST->hasMVEIntegerOps() &&
517 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
518 Opcode == Instruction::SExt)) ||
519 (ST->hasMVEFloatOps() &&
520 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
521 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
524 ST->getMVEVectorCostFactor(
CostKind);
544 LoadConversionTbl,
ISD, DstTy.
getSimpleVT(), SrcTy.getSimpleVT()))
545 return AdjustCost(Entry->Cost);
564 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
565 if (
const auto *Entry =
568 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
576 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
577 if (
const auto *Entry =
580 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
593 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
594 if (
const auto *Entry =
597 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
604 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
605 if (
const auto *Entry =
608 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
614 I &&
I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
617 {
ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
618 {
ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
620 {
ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
621 {
ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
623 {
ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
624 {
ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
626 {
ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
627 {
ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
631 int UserISD = TLI->InstructionOpcodeToISD(
User->getOpcode());
634 SrcTy.getSimpleVT())) {
635 return AdjustCost(Entry->Cost);
640 if (Src->isVectorTy() && ST->hasNEON() &&
653 return AdjustCost(LT.first * Entry->Cost);
742 if (SrcTy.isVector() && ST->hasNEON()) {
745 SrcTy.getSimpleVT()))
746 return AdjustCost(Entry->Cost);
772 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
775 SrcTy.getSimpleVT()))
776 return AdjustCost(Entry->Cost);
803 if (SrcTy.isInteger() && ST->hasNEON()) {
806 SrcTy.getSimpleVT()))
807 return AdjustCost(Entry->Cost);
828 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
831 SrcTy.getSimpleVT()))
832 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
842 if (SrcTy.isFixedLengthVector())
843 Lanes = SrcTy.getVectorNumElements();
845 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
848 return Lanes * CallCost;
852 SrcTy.isFixedLengthVector()) {
855 if ((SrcTy.getScalarType() == MVT::i8 ||
856 SrcTy.getScalarType() == MVT::i16 ||
857 SrcTy.getScalarType() == MVT::i32) &&
858 SrcTy.getSizeInBits() > 128 &&
860 return SrcTy.getVectorNumElements() * 2;
875 if (SrcTy.isInteger()) {
878 SrcTy.getSimpleVT()))
879 return AdjustCost(Entry->Cost);
882 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
883 ? ST->getMVEVectorCostFactor(
CostKind)
894 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
898 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
899 Opcode == Instruction::ExtractElement)) {
909 return std::max<InstructionCost>(
915 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
916 Opcode == Instruction::ExtractElement)) {
920 std::pair<InstructionCost, MVT> LT =
922 return LT.first * (ValTy->
getScalarType()->isIntegerTy() ? 4 : 1);
933 int ISD = TLI->InstructionOpcodeToISD(Opcode);
939 if (TLI->getValueType(
DL, ValTy,
true) == MVT::Other)
963 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
968 const Value *LHS, *RHS;
973 IID = Intrinsic::abs;
976 IID = Intrinsic::smin;
979 IID = Intrinsic::smax;
982 IID = Intrinsic::umin;
985 IID = Intrinsic::umax;
988 IID = Intrinsic::minnum;
991 IID = Intrinsic::maxnum;
1009 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1014 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
1015 EVT SelValTy = TLI->getValueType(
DL, ValTy);
1027 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy() &&
1028 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1036 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1050 int BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1056 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1058 return LT.first * BaseCost +
1068 if (ST->hasMVEIntegerOps() && ValTy->
isVectorTy())
1069 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1083 unsigned NumVectorInstToHideOverhead = 10;
1084 int MaxMergeDistance = 64;
1086 if (ST->hasNEON()) {
1089 return NumVectorInstToHideOverhead;
1102 switch (
II->getIntrinsicID()) {
1103 case Intrinsic::arm_mve_vctp8:
1104 case Intrinsic::arm_mve_vctp16:
1105 case Intrinsic::arm_mve_vctp32:
1106 case Intrinsic::arm_mve_vctp64:
1123 if (VecTy->getNumElements() == 2)
1128 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1133 return (EltWidth == 32 && Alignment >= 4) ||
1134 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1141 unsigned EltWidth = Ty->getScalarSizeInBits();
1142 return ((EltWidth == 32 && Alignment >= 4) ||
1143 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1151 unsigned DstAddrSpace = ~0u;
1152 unsigned SrcAddrSpace = ~0u;
1153 const Function *
F =
I->getParent()->getParent();
1161 const unsigned Size =
C->getValue().getZExtValue();
1162 const Align DstAlign = MC->getDestAlign().valueOrOne();
1163 const Align SrcAlign = MC->getSourceAlign().valueOrOne();
1167 DstAddrSpace = MC->getDestAddressSpace();
1168 SrcAddrSpace = MC->getSourceAddressSpace();
1176 const unsigned Size =
C->getValue().getZExtValue();
1177 const Align DstAlign = MS->getDestAlign().valueOrOne();
1181 DstAddrSpace = MS->getDestAddressSpace();
1186 unsigned Limit, Factor = 2;
1187 switch(
I->getIntrinsicID()) {
1188 case Intrinsic::memcpy:
1189 Limit = TLI->getMaxStoresPerMemcpy(
F->hasMinSize());
1191 case Intrinsic::memmove:
1192 Limit = TLI->getMaxStoresPerMemmove(
F->hasMinSize());
1194 case Intrinsic::memset:
1195 Limit = TLI->getMaxStoresPerMemset(
F->hasMinSize());
1205 std::vector<EVT> MemOps;
1207 if (getTLI()->findOptimalMemOpLowering(
C, MemOps, Limit, MOp, DstAddrSpace,
1208 SrcAddrSpace,
F->getAttributes(),
1210 return MemOps.size() * Factor;
1235 "Expected the Mask to match the return size if given");
1237 "Expected the same scalar types");
1242 if (IsExtractSubvector)
1244 if (ST->hasNEON()) {
1261 if (
const auto *Entry =
1263 return LT.first * Entry->Cost;
1282 if (
const auto *Entry =
1284 return LT.first * Entry->Cost;
1308 return LT.first * Entry->Cost;
1311 if (ST->hasMVEIntegerOps()) {
1324 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(
CostKind);
1327 if (!Mask.empty()) {
1334 (LT.second.getScalarSizeInBits() == 8 ||
1335 LT.second.getScalarSizeInBits() == 16 ||
1336 LT.second.getScalarSizeInBits() == 32) &&
1337 LT.second.getSizeInBits() == 128 &&
1338 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1340 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1342 return ST->getMVEVectorCostFactor(
CostKind) *
1343 std::max<InstructionCost>(1, LT.first / 4);
1350 (LT.second.getScalarSizeInBits() == 8 ||
1351 LT.second.getScalarSizeInBits() == 16 ||
1352 LT.second.getScalarSizeInBits() == 32) &&
1353 LT.second.getSizeInBits() == 128 &&
1354 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
1356 Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
1357 (TLI->getMaxSupportedInterleaveFactor() == 4 &&
1359 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
1360 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1362 if (LT.second.isVector() &&
1363 Mask.size() <= LT.second.getVectorNumElements() &&
1366 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1371 if (IsExtractSubvector)
1373 int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()
1374 ? ST->getMVEVectorCostFactor(
CostKind)
1384 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
1389 switch (ISDOpcode) {
1402 if (ST->hasNEON()) {
1403 const unsigned FunctionCallDivCost = 20;
1404 const unsigned ReciprocalDivCost = 10;
1410 {
ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1411 {
ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1412 {
ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1413 {
ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1414 {
ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1415 {
ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1416 {
ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1417 {
ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1418 {
ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1419 {
ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1420 {
ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1421 {
ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1422 {
ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1423 {
ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1424 {
ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1425 {
ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1427 {
ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1428 {
ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1429 {
ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1430 {
ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1431 {
ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1432 {
ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1433 {
ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1434 {
ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1435 {
ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1436 {
ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1437 {
ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1438 {
ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1439 {
ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1440 {
ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1441 {
ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1442 {
ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1446 if (
const auto *Entry =
CostTableLookup(CostTbl, ISDOpcode, LT.second))
1447 return LT.first * Entry->Cost;
1450 Opcode, Ty,
CostKind, Op1Info, Op2Info);
1467 auto LooksLikeAFreeShift = [&]() {
1468 if (ST->isThumb1Only() || Ty->isVectorTy())
1478 case Instruction::Add:
1479 case Instruction::Sub:
1480 case Instruction::And:
1481 case Instruction::Xor:
1482 case Instruction::Or:
1483 case Instruction::ICmp:
1489 if (LooksLikeAFreeShift())
1499 auto MulInDSPMLALPattern = [&](
const Instruction *
I,
unsigned Opcode,
1507 if (Opcode != Instruction::Mul)
1510 if (Ty->isVectorTy())
1513 auto ValueOpcodesEqual = [](
const Value *LHS,
const Value *RHS) ->
bool {
1517 auto IsExtInst = [](
const Value *V) ->
bool {
1520 auto IsExtensionFromHalf = [](
const Value *V) ->
bool {
1528 Value *Op0 = BinOp->getOperand(0);
1529 Value *Op1 = BinOp->getOperand(1);
1530 if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {
1532 if (!
I->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||
1533 !IsExtensionFromHalf(Op1))
1537 for (
auto *U :
I->users())
1546 if (MulInDSPMLALPattern(CxtI, Opcode, Ty))
1552 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
1553 BaseCost = ST->getMVEVectorCostFactor(
CostKind);
1559 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
1560 return LT.first * BaseCost;
1564 unsigned Num = VTy->getNumElements();
1593 if (TLI->getValueType(
DL, Src,
true) == MVT::Other)
1597 if (ST->hasNEON() && Src->isVectorTy() && Alignment !=
Align(16) &&
1602 return LT.first * 4;
1608 ((Opcode == Instruction::Load &&
I->hasOneUse() &&
1613 Opcode == Instruction::Load
1614 ? (*
I->user_begin())->getType()
1618 return ST->getMVEVectorCostFactor(
CostKind);
1621 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1622 ? ST->getMVEVectorCostFactor(
CostKind)
1631 switch (MICA.
getID()) {
1632 case Intrinsic::masked_scatter:
1633 case Intrinsic::masked_gather:
1635 case Intrinsic::masked_load:
1636 case Intrinsic::masked_store:
1645 unsigned IID = MICA.
getID();
1649 if (ST->hasMVEIntegerOps()) {
1650 if (IID == Intrinsic::masked_load &&
1652 return ST->getMVEVectorCostFactor(
CostKind);
1653 if (IID == Intrinsic::masked_store &&
1655 return ST->getMVEVectorCostFactor(
CostKind);
1667 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1668 assert(Factor >= 2 &&
"Invalid interleave factor");
1672 bool EltIs64Bits =
DL.getTypeSizeInBits(VecTy->
getScalarType()) == 64;
1674 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1675 !UseMaskForCond && !UseMaskForGaps) {
1684 ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
1685 if (NumElts % Factor == 0 &&
1686 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment,
DL))
1687 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy,
DL);
1694 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1696 DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)
1697 return 2 * BaseCost;
1702 UseMaskForCond, UseMaskForGaps);
1724 unsigned NumElems = VTy->getNumElements();
1725 unsigned EltSize = VTy->getScalarSizeInBits();
1734 NumElems * LT.first * ST->getMVEVectorCostFactor(
CostKind);
1740 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1746 if (EltSize < 8 || Alignment < EltSize / 8)
1749 unsigned ExtSize = EltSize;
1755 if ((
I->getOpcode() == Instruction::Load ||
1758 const User *Us = *
I->users().begin();
1763 if (((
TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1764 (
TypeSize == 16 && EltSize == 8)) &&
1772 if ((
I->getOpcode() == Instruction::Store ||
1776 unsigned TypeSize =
T->getOperand(0)->getType()->getScalarSizeInBits();
1777 if (((EltSize == 16 &&
TypeSize == 32) ||
1784 if (ExtSize * NumElems != 128 || NumElems < 4)
1793 if (ExtSize != 8 && ExtSize != 16)
1797 Ptr = BC->getOperand(0);
1799 if (
GEP->getNumOperands() != 2)
1801 unsigned Scale =
DL.getTypeAllocSize(
GEP->getResultElementType());
1803 if (Scale != 1 && Scale * 8 != ExtSize)
1807 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1817 std::optional<FastMathFlags> FMF,
1820 EVT ValVT = TLI->getValueType(
DL, ValTy);
1821 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1828 ((EltSize == 32 && ST->hasVFP2Base()) ||
1829 (EltSize == 64 && ST->hasFP64()) ||
1830 (EltSize == 16 && ST->hasFullFP16()))) {
1832 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1835 NumElts * EltSize > VecLimit) {
1846 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
1849 ExtractCost = NumElts / 2;
1851 return VecCost + ExtractCost +
1857 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1860 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1862 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1870 NumElts * EltSize == 64) {
1872 VecCost += ST->getMVEVectorCostFactor(
CostKind) +
1879 return VecCost + ExtractCost +
1896 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1902 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1904 EVT ValVT = TLI->getValueType(
DL, ValTy);
1905 EVT ResVT = TLI->getValueType(
DL, ResTy);
1907 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1922 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1923 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1924 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1925 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1939 if (RedOpcode != Instruction::Add)
1941 EVT ValVT = TLI->getValueType(
DL, ValTy);
1942 EVT ResVT = TLI->getValueType(
DL, ResTy);
1955 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1956 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1957 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1958 return ST->getMVEVectorCostFactor(
CostKind) * LT.first;
1969 EVT ValVT = TLI->getValueType(
DL, Ty);
1974 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1980 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1982 while (
isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1994 VecCost += ST->getMVEVectorCostFactor(
CostKind) * 2;
2000 {Ty->getElementType(), Ty->getElementType()},
2002 return VecCost + ExtractCost +
2006 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
2007 IID == Intrinsic::umin || IID == Intrinsic::umax) {
2019 return Entry->Cost * ST->getMVEVectorCostFactor(
CostKind) * LT.first;
2030 case Intrinsic::get_active_lane_mask:
2038 if (ST->hasMVEIntegerOps())
2041 case Intrinsic::sadd_sat:
2042 case Intrinsic::ssub_sat:
2043 case Intrinsic::uadd_sat:
2044 case Intrinsic::usub_sat: {
2045 bool IsAdd = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2046 bool IsSigned = (
Opc == Intrinsic::sadd_sat ||
Opc == Intrinsic::ssub_sat);
2050 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
2052 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
2066 if (!ST->hasMVEIntegerOps())
2070 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2071 LT.second == MVT::v16i8) {
2077 return LT.first * ST->getMVEVectorCostFactor(
CostKind) * Instrs;
2081 case Intrinsic::abs:
2082 case Intrinsic::smin:
2083 case Intrinsic::smax:
2084 case Intrinsic::umin:
2085 case Intrinsic::umax: {
2086 if (!ST->hasMVEIntegerOps())
2091 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
2092 LT.second == MVT::v16i8)
2093 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2096 case Intrinsic::minnum:
2097 case Intrinsic::maxnum: {
2098 if (!ST->hasMVEFloatOps())
2102 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
2103 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2106 case Intrinsic::fptosi_sat:
2107 case Intrinsic::fptoui_sat: {
2110 bool IsSigned =
Opc == Intrinsic::fptosi_sat;
2114 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
2115 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
2116 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
2120 if (ST->hasMVEFloatOps() &&
2121 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2123 return LT.first * ST->getMVEVectorCostFactor(
CostKind);
2126 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
2127 (ST->hasFP64() && LT.second == MVT::f64) ||
2128 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2129 (ST->hasMVEFloatOps() &&
2130 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2133 LT.second.getScalarSizeInBits());
2135 LT.second.isVector() ? ST->getMVEVectorCostFactor(
CostKind) : 1;
2138 LegalTy, {LegalTy, LegalTy});
2142 LegalTy, {LegalTy, LegalTy});
2144 return LT.first *
Cost;
2172 if (!
F->isIntrinsic())
2176 if (
F->getName().starts_with(
"llvm.arm"))
2179 switch (
F->getIntrinsicID()) {
2181 case Intrinsic::powi:
2182 case Intrinsic::sin:
2183 case Intrinsic::cos:
2184 case Intrinsic::sincos:
2185 case Intrinsic::pow:
2186 case Intrinsic::log:
2187 case Intrinsic::log10:
2188 case Intrinsic::log2:
2189 case Intrinsic::exp:
2190 case Intrinsic::exp2:
2192 case Intrinsic::sqrt:
2193 case Intrinsic::fabs:
2194 case Intrinsic::copysign:
2195 case Intrinsic::floor:
2196 case Intrinsic::ceil:
2197 case Intrinsic::trunc:
2198 case Intrinsic::rint:
2199 case Intrinsic::nearbyint:
2200 case Intrinsic::round:
2201 case Intrinsic::canonicalize:
2202 case Intrinsic::lround:
2203 case Intrinsic::llround:
2204 case Intrinsic::lrint:
2205 case Intrinsic::llrint:
2206 if (
F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2208 if (
F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2213 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
2214 case Intrinsic::masked_store:
2215 case Intrinsic::masked_load:
2216 case Intrinsic::masked_gather:
2217 case Intrinsic::masked_scatter:
2218 return !ST->hasMVEIntegerOps();
2219 case Intrinsic::sadd_with_overflow:
2220 case Intrinsic::uadd_with_overflow:
2221 case Intrinsic::ssub_with_overflow:
2222 case Intrinsic::usub_with_overflow:
2223 case Intrinsic::sadd_sat:
2224 case Intrinsic::uadd_sat:
2225 case Intrinsic::ssub_sat:
2226 case Intrinsic::usub_sat:
2234 unsigned ISD = TLI->InstructionOpcodeToISD(
I.getOpcode());
2235 EVT VT = TLI->getValueType(
DL,
I.getType(),
true);
2243 switch(
II->getIntrinsicID()) {
2244 case Intrinsic::memcpy:
2245 case Intrinsic::memset:
2246 case Intrinsic::memmove:
2258 switch (
I.getOpcode()) {
2261 case Instruction::FPToSI:
2262 case Instruction::FPToUI:
2263 case Instruction::SIToFP:
2264 case Instruction::UIToFP:
2265 case Instruction::FPTrunc:
2266 case Instruction::FPExt:
2267 return !ST->hasFPARMv8Base();
2295 if (TLI->useSoftFloat()) {
2296 switch (
I.getOpcode()) {
2299 case Instruction::Alloca:
2300 case Instruction::Load:
2301 case Instruction::Store:
2302 case Instruction::Select:
2303 case Instruction::PHI:
2310 if (
I.getType()->isDoubleTy() && !ST->hasFP64())
2314 if (
I.getType()->isHalfTy() && !ST->hasFullFP16())
2342 const SCEV *TripCountSCEV =
2348 LLVM_DEBUG(
dbgs() <<
"ARMHWLoops: Trip count does not fit into 32bits\n");
2357 switch (
Call->getIntrinsicID()) {
2360 case Intrinsic::start_loop_iterations:
2361 case Intrinsic::test_start_loop_iterations:
2362 case Intrinsic::loop_decrement:
2363 case Intrinsic::loop_decrement_reg:
2373 bool IsTailPredLoop =
false;
2374 auto ScanLoop = [&](
Loop *L) {
2375 for (
auto *BB : L->getBlocks()) {
2376 for (
auto &
I : *BB) {
2384 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2385 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2386 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2387 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2388 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2395 for (
auto *Inner : *L)
2396 if (!ScanLoop(Inner))
2428 if ((
II->getIntrinsicID() == Intrinsic::smin ||
2429 II->getIntrinsicID() == Intrinsic::smax ||
2430 II->getIntrinsicID() == Intrinsic::umin ||
2431 II->getIntrinsicID() == Intrinsic::umax) &&
2470 LLVM_DEBUG(
dbgs() <<
"Tail-predication: checking allowed instructions\n");
2481 bool ReductionsDisabled =
2485 for (
auto *
I : LiveOuts) {
2486 if (!
I->getType()->isIntegerTy() && !
I->getType()->isFloatTy() &&
2487 !
I->getType()->isHalfTy()) {
2488 LLVM_DEBUG(
dbgs() <<
"Don't tail-predicate loop with non-integer/float "
2489 "live-out value\n");
2492 if (ReductionsDisabled) {
2512 if (
T->getScalarSizeInBits() > 32) {
2519 int64_t NextStride =
2521 if (NextStride == 1) {
2526 }
else if (NextStride == -1 ||
2530 <<
"Consecutive strides of 2 found, vld2/vstr2 can't "
2531 "be tail-predicated\n.");
2541 const SCEV *Step = AR->getStepRecurrence(*PSE.
getSE());
2547 "tail-predicate\n.");
2553 LLVM_DEBUG(
dbgs() <<
"tail-predication: all instructions allowed!\n");
2566 if (!ST->hasMVEIntegerOps())
2573 if (L->getNumBlocks() > 1) {
2574 LLVM_DEBUG(
dbgs() <<
"preferTailFoldingOverEpilogue: not a single block "
2579 assert(L->isInnermost() &&
2580 "preferTailFoldingOverEpilogue: inner-loop expected");
2585 LLVM_DEBUG(
dbgs() <<
"preferTailFoldingOverEpilogue: hardware-loop is not "
2596 LLVM_DEBUG(
dbgs() <<
"preferTailFoldingOverEpilogue: hardware-loop is not "
2603 LLVM_DEBUG(
dbgs() <<
"preferTailFoldingOverEpilogue: hardware-loop is not "
2630 return isa<IntrinsicInst>(I) &&
2631 cast<IntrinsicInst>(I).getIntrinsicID() ==
2632 Intrinsic::get_active_lane_mask;
2636 if (!ST->isMClass())
2642 if (L->getHeader()->getParent()->hasOptSize())
2646 L->getExitingBlocks(ExitingBlocks);
2648 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2649 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2653 if (ExitingBlocks.
size() > 2)
2658 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2668 for (
auto *BB : L->getBlocks()) {
2669 for (
auto &
I : *BB) {
2672 if (
I.getType()->isVectorTy())
2696 if (ST->isThumb1Only()) {
2697 unsigned ExitingValues = 0;
2699 L->getExitBlocks(ExitBlocks);
2700 for (
auto *Exit : ExitBlocks) {
2703 unsigned LiveOuts =
count_if(Exit->phis(), [](
auto &PH) {
2704 return PH.getNumOperands() != 1 ||
2705 !isa<GetElementPtrInst>(PH.getOperand(0));
2707 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2728 auto *Outer = L->getOutermostLoop();
2729 if ((L != Outer && Outer != L->getParentLoop()) ||
2758 if (!ST->hasMVEIntegerOps())
2761 unsigned ScalarBits = Ty->getScalarSizeInBits();
2764 return ScalarBits <= 64;
2771 if (!ST->hasMVEIntegerOps())
2778 bool HasBaseReg, int64_t Scale,
2779 unsigned AddrSpace)
const {
2788 return AM.
Scale < 0 ? 1 : 0;
2798 return ST->hasMVEIntegerOps();
2806 return ST->isThumb2() || ST->hasV8MBaselineOps();
2810 return ST->hasARMOps();
2820 return Ext->getType()->getScalarSizeInBits() ==
2821 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2840 if (!
I->getType()->isVectorTy())
2843 if (ST->hasNEON()) {
2844 switch (
I->getOpcode()) {
2845 case Instruction::Sub:
2846 case Instruction::Add: {
2849 Ops.push_back(&
I->getOperandUse(0));
2850 Ops.push_back(&
I->getOperandUse(1));
2858 if (!ST->hasMVEIntegerOps())
2862 if (!
I->hasOneUse())
2865 return Sub->getOpcode() == Instruction::FSub &&
Sub->getOperand(1) ==
I;
2875 switch (
I->getOpcode()) {
2876 case Instruction::Add:
2877 case Instruction::Mul:
2878 case Instruction::FAdd:
2879 case Instruction::ICmp:
2880 case Instruction::FCmp:
2882 case Instruction::FMul:
2883 return !IsFMSMul(
I);
2884 case Instruction::Sub:
2885 case Instruction::FSub:
2886 case Instruction::Shl:
2887 case Instruction::LShr:
2888 case Instruction::AShr:
2889 return Operand == 1;
2890 case Instruction::Call:
2892 switch (
II->getIntrinsicID()) {
2893 case Intrinsic::fma:
2895 case Intrinsic::sadd_sat:
2896 case Intrinsic::uadd_sat:
2897 case Intrinsic::arm_mve_add_predicated:
2898 case Intrinsic::arm_mve_mul_predicated:
2899 case Intrinsic::arm_mve_qadd_predicated:
2900 case Intrinsic::arm_mve_vhadd:
2901 case Intrinsic::arm_mve_hadd_predicated:
2902 case Intrinsic::arm_mve_vqdmull:
2903 case Intrinsic::arm_mve_vqdmull_predicated:
2904 case Intrinsic::arm_mve_vqdmulh:
2905 case Intrinsic::arm_mve_qdmulh_predicated:
2906 case Intrinsic::arm_mve_vqrdmulh:
2907 case Intrinsic::arm_mve_qrdmulh_predicated:
2908 case Intrinsic::arm_mve_fma_predicated:
2910 case Intrinsic::ssub_sat:
2911 case Intrinsic::usub_sat:
2912 case Intrinsic::arm_mve_sub_predicated:
2913 case Intrinsic::arm_mve_qsub_predicated:
2914 case Intrinsic::arm_mve_hsub_predicated:
2915 case Intrinsic::arm_mve_vhsub:
2916 return Operand == 1;
2934 if (Shuffle->
getOpcode() == Instruction::BitCast)
2941 if (!IsSinker(
I,
OpIdx.index()))
2946 for (
Use &U :
Op->uses()) {
2948 if (!IsSinker(Insn, U.getOperandNo()))
2954 Ops.push_back(&
Op->getOperandUse(0));
2976 unsigned NumBytesToPad = 4 - (
Size % 4);
2977 unsigned NewSize =
Size + NumBytesToPad;
2983 if (NewSize > MaxMemIntrinsicSize)
2986 return NumBytesToPad;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file provides the interface for the instcombine pass implementation.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the SmallVector class.
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMemcpyCost(const Instruction *I) const override
bool maybeLoweredToCall(Instruction &I) const
bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool hasArmWideBranch(bool Thumb) const override
bool shouldConsiderVectorizationRegPressure() const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override
bool isLoweredToCall(const Function *F) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override
TailFoldingStyle getPreferredTailFoldingStyle() const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
bool preferPredicatedReductionSelect() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override
bool isProfitableLSRChainElement(Instruction *I) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
static InstructionCost getInvalid(CostType Val=0)
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
const Value * getPointer() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
auto dyn_cast_or_null(const Y &Val)
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...