23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
325 auto FVTy = dyn_cast<FixedVectorType>(Ty);
327 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
336 unsigned DefaultCallPenalty)
const {
361 if (
F ==
Call.getCaller())
367 return DefaultCallPenalty;
378 ST->isSVEorStreamingSVEAvailable() &&
379 !ST->disableMaximizeScalableBandwidth();
403 assert(Ty->isIntegerTy());
405 unsigned BitSize = Ty->getPrimitiveSizeInBits();
412 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
417 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
423 return std::max<InstructionCost>(1,
Cost);
430 assert(Ty->isIntegerTy());
432 unsigned BitSize = Ty->getPrimitiveSizeInBits();
438 unsigned ImmIdx = ~0U;
442 case Instruction::GetElementPtr:
447 case Instruction::Store:
450 case Instruction::Add:
451 case Instruction::Sub:
452 case Instruction::Mul:
453 case Instruction::UDiv:
454 case Instruction::SDiv:
455 case Instruction::URem:
456 case Instruction::SRem:
457 case Instruction::And:
458 case Instruction::Or:
459 case Instruction::Xor:
460 case Instruction::ICmp:
464 case Instruction::Shl:
465 case Instruction::LShr:
466 case Instruction::AShr:
470 case Instruction::Trunc:
471 case Instruction::ZExt:
472 case Instruction::SExt:
473 case Instruction::IntToPtr:
474 case Instruction::PtrToInt:
475 case Instruction::BitCast:
476 case Instruction::PHI:
477 case Instruction::Call:
478 case Instruction::Select:
479 case Instruction::Ret:
480 case Instruction::Load:
485 int NumConstants = (BitSize + 63) / 64;
498 assert(Ty->isIntegerTy());
500 unsigned BitSize = Ty->getPrimitiveSizeInBits();
509 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
515 case Intrinsic::sadd_with_overflow:
516 case Intrinsic::uadd_with_overflow:
517 case Intrinsic::ssub_with_overflow:
518 case Intrinsic::usub_with_overflow:
519 case Intrinsic::smul_with_overflow:
520 case Intrinsic::umul_with_overflow:
522 int NumConstants = (BitSize + 63) / 64;
529 case Intrinsic::experimental_stackmap:
530 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
533 case Intrinsic::experimental_patchpoint_void:
534 case Intrinsic::experimental_patchpoint:
535 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
538 case Intrinsic::experimental_gc_statepoint:
539 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 if (TyWidth == 32 || TyWidth == 64)
558 return ST->getSchedModel().MispredictPenalty;
579 unsigned TotalHistCnts = 1;
589 unsigned EC = VTy->getElementCount().getKnownMinValue();
594 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
596 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
600 TotalHistCnts = EC / NaturalVectorWidth;
620 switch (ICA.
getID()) {
621 case Intrinsic::experimental_vector_histogram_add: {
628 case Intrinsic::clmul: {
633 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
637 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8) {
642 -1,
nullptr,
nullptr) *
645 -1,
nullptr,
nullptr);
649 if (LT.second.SimpleTy == MVT::nxv2i64)
650 if (ST->hasSVEAES() && (ST->isSVEAvailable() || ST->hasSSVE_AES()))
653 if (ST->hasSVE2() || ST->hasSME()) {
654 switch (LT.second.SimpleTy) {
669 if (LT.second.SimpleTy == MVT::nxv2i64)
673 switch (LT.second.SimpleTy) {
683 -1,
nullptr,
nullptr) *
686 -1,
nullptr,
nullptr));
695 return LT.first * 11;
697 return LT.first * 14;
704 case Intrinsic::umin:
705 case Intrinsic::umax:
706 case Intrinsic::smin:
707 case Intrinsic::smax: {
708 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
709 MVT::v8i16, MVT::v2i32, MVT::v4i32,
710 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
714 if (LT.second == MVT::v2i64)
720 case Intrinsic::scmp:
721 case Intrinsic::ucmp: {
723 {Intrinsic::scmp, MVT::i32, 3},
724 {Intrinsic::scmp, MVT::i64, 3},
725 {Intrinsic::scmp, MVT::v8i8, 3},
726 {Intrinsic::scmp, MVT::v16i8, 3},
727 {Intrinsic::scmp, MVT::v4i16, 3},
728 {Intrinsic::scmp, MVT::v8i16, 3},
729 {Intrinsic::scmp, MVT::v2i32, 3},
730 {Intrinsic::scmp, MVT::v4i32, 3},
731 {Intrinsic::scmp, MVT::v1i64, 3},
732 {Intrinsic::scmp, MVT::v2i64, 3},
738 return Entry->Cost * LT.first;
741 case Intrinsic::sadd_sat:
742 case Intrinsic::ssub_sat:
743 case Intrinsic::uadd_sat:
744 case Intrinsic::usub_sat: {
745 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
746 MVT::v8i16, MVT::v2i32, MVT::v4i32,
752 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
754 return LT.first * Instrs;
759 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
760 return LT.first * Instrs;
764 case Intrinsic::abs: {
765 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
766 MVT::v8i16, MVT::v2i32, MVT::v4i32,
767 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
768 MVT::nxv4i32, MVT::nxv2i64};
774 case Intrinsic::bswap: {
775 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
776 MVT::v4i32, MVT::v2i64};
779 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
784 case Intrinsic::fmuladd: {
789 (EltTy->
isHalfTy() && ST->hasFullFP16()))
793 case Intrinsic::stepvector: {
802 Cost += AddCost * (LT.first - 1);
806 case Intrinsic::vector_extract:
807 case Intrinsic::vector_insert: {
820 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
821 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
829 getTLI()->getTypeConversion(
C, SubVecVT);
831 getTLI()->getTypeConversion(
C, VecVT);
839 case Intrinsic::bitreverse: {
841 {Intrinsic::bitreverse, MVT::i32, 1},
842 {Intrinsic::bitreverse, MVT::i64, 1},
843 {Intrinsic::bitreverse, MVT::v8i8, 1},
844 {Intrinsic::bitreverse, MVT::v16i8, 1},
845 {Intrinsic::bitreverse, MVT::v4i16, 2},
846 {Intrinsic::bitreverse, MVT::v8i16, 2},
847 {Intrinsic::bitreverse, MVT::v2i32, 2},
848 {Intrinsic::bitreverse, MVT::v4i32, 2},
849 {Intrinsic::bitreverse, MVT::v1i64, 2},
850 {Intrinsic::bitreverse, MVT::v2i64, 2},
858 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
859 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
860 return LegalisationCost.first * Entry->Cost + 1;
862 return LegalisationCost.first * Entry->Cost;
866 case Intrinsic::ctpop: {
867 if (!ST->hasNEON()) {
899 RetTy->getScalarSizeInBits()
902 return LT.first * Entry->Cost + ExtraCost;
906 case Intrinsic::sadd_with_overflow:
907 case Intrinsic::uadd_with_overflow:
908 case Intrinsic::ssub_with_overflow:
909 case Intrinsic::usub_with_overflow:
910 case Intrinsic::smul_with_overflow:
911 case Intrinsic::umul_with_overflow: {
913 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
914 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
915 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
916 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
917 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
918 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
919 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
920 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
921 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
922 {Intrinsic::usub_with_overflow, MVT::i8, 3},
923 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
924 {Intrinsic::usub_with_overflow, MVT::i16, 3},
925 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
926 {Intrinsic::usub_with_overflow, MVT::i32, 1},
927 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
928 {Intrinsic::usub_with_overflow, MVT::i64, 1},
929 {Intrinsic::smul_with_overflow, MVT::i8, 5},
930 {Intrinsic::umul_with_overflow, MVT::i8, 4},
931 {Intrinsic::smul_with_overflow, MVT::i16, 5},
932 {Intrinsic::umul_with_overflow, MVT::i16, 4},
933 {Intrinsic::smul_with_overflow, MVT::i32, 2},
934 {Intrinsic::umul_with_overflow, MVT::i32, 2},
935 {Intrinsic::smul_with_overflow, MVT::i64, 3},
936 {Intrinsic::umul_with_overflow, MVT::i64, 3},
938 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
945 case Intrinsic::fptosi_sat:
946 case Intrinsic::fptoui_sat: {
949 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
951 EVT MTy = TLI->getValueType(
DL, RetTy);
954 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
955 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
956 LT.second == MVT::v2f64)) {
958 (LT.second == MVT::f64 && MTy == MVT::i32) ||
959 (LT.second == MVT::f32 && MTy == MVT::i64)))
968 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
975 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
976 (LT.second == MVT::f16 && MTy == MVT::i64) ||
977 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
991 if ((LT.second.getScalarType() == MVT::f32 ||
992 LT.second.getScalarType() == MVT::f64 ||
993 LT.second.getScalarType() == MVT::f16) &&
997 if (LT.second.isVector())
1002 LegalTy, {LegalTy, LegalTy});
1006 LegalTy, {LegalTy, LegalTy});
1008 return LT.first *
Cost +
1009 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
1015 RetTy = RetTy->getScalarType();
1016 if (LT.second.isVector()) {
1034 return LT.first *
Cost;
1036 case Intrinsic::fshl:
1037 case Intrinsic::fshr: {
1046 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
1047 (RetTy->getPrimitiveSizeInBits() == 32 ||
1048 RetTy->getPrimitiveSizeInBits() == 64)) {
1061 {Intrinsic::fshl, MVT::v4i32, 2},
1062 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
1063 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
1064 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
1070 return LegalisationCost.first * Entry->Cost;
1074 if (!RetTy->isIntegerTy())
1079 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1080 RetTy->getScalarSizeInBits() < 64) ||
1081 (RetTy->getScalarSizeInBits() % 64 != 0);
1082 unsigned ExtraCost = HigherCost ? 1 : 0;
1083 if (RetTy->getScalarSizeInBits() == 32 ||
1084 RetTy->getScalarSizeInBits() == 64)
1087 else if (HigherCost)
1091 return TyL.first + ExtraCost;
1093 case Intrinsic::get_active_lane_mask: {
1095 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1097 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1100 if (RetTy->isScalableTy()) {
1101 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1111 if (ST->hasSVE2p1() || ST->hasSME2()) {
1126 return Cost + (SplitCost * (
Cost - 1));
1141 case Intrinsic::experimental_vector_match: {
1144 unsigned SearchSize = NeedleTy->getNumElements();
1145 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1158 case Intrinsic::cttz: {
1160 if (LT.second == MVT::v8i8 || LT.second == MVT::v16i8)
1161 return LT.first * 2;
1162 if (LT.second == MVT::v4i16 || LT.second == MVT::v8i16 ||
1163 LT.second == MVT::v2i32 || LT.second == MVT::v4i32)
1164 return LT.first * 3;
1167 case Intrinsic::experimental_cttz_elts: {
1169 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1177 case Intrinsic::loop_dependence_raw_mask:
1178 case Intrinsic::loop_dependence_war_mask: {
1180 if (ST->hasSVE2() || ST->hasSME()) {
1181 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1182 unsigned EltSizeInBytes =
1192 case Intrinsic::experimental_vector_extract_last_active:
1193 if (ST->isSVEorStreamingSVEAvailable()) {
1199 case Intrinsic::pow: {
1202 EVT VT = getTLI()->getValueType(
DL, RetTy);
1204 bool HasLibcall = getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported;
1219 bool Is025 = ExpF->getValueAPF().isExactlyValue(0.25);
1220 bool Is075 = ExpF->getValueAPF().isExactlyValue(0.75);
1230 return (Sqrt * 2) +
FMul;
1241 case Intrinsic::sqrt:
1242 case Intrinsic::fabs:
1243 case Intrinsic::ceil:
1244 case Intrinsic::floor:
1245 case Intrinsic::nearbyint:
1246 case Intrinsic::round:
1247 case Intrinsic::rint:
1248 case Intrinsic::roundeven:
1249 case Intrinsic::trunc:
1250 case Intrinsic::minnum:
1251 case Intrinsic::maxnum:
1252 case Intrinsic::minimum:
1253 case Intrinsic::maximum: {
1271 auto RequiredType =
II.getType();
1274 assert(PN &&
"Expected Phi Node!");
1277 if (!PN->hasOneUse())
1278 return std::nullopt;
1280 for (
Value *IncValPhi : PN->incoming_values()) {
1283 Reinterpret->getIntrinsicID() !=
1284 Intrinsic::aarch64_sve_convert_to_svbool ||
1285 RequiredType != Reinterpret->getArgOperand(0)->getType())
1286 return std::nullopt;
1294 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1296 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1369 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1374 return GoverningPredicateIdx;
1379 GoverningPredicateIdx = Index;
1397 return UndefIntrinsic;
1402 UndefIntrinsic = IID;
1424 return ResultLanes == InactiveLanesTakenFromOperand;
1429 return OperandIdxForInactiveLanes;
1433 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1434 ResultLanes = InactiveLanesTakenFromOperand;
1435 OperandIdxForInactiveLanes = Index;
1440 return ResultLanes == InactiveLanesAreNotDefined;
1444 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1445 ResultLanes = InactiveLanesAreNotDefined;
1450 return ResultLanes == InactiveLanesAreUnused;
1454 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1455 ResultLanes = InactiveLanesAreUnused;
1465 ResultIsZeroInitialized =
true;
1476 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1481 return OperandIdxWithNoActiveLanes;
1486 OperandIdxWithNoActiveLanes = Index;
1491 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1494 unsigned IROpcode = 0;
1496 enum PredicationStyle {
1498 InactiveLanesTakenFromOperand,
1499 InactiveLanesAreNotDefined,
1500 InactiveLanesAreUnused
1503 bool ResultIsZeroInitialized =
false;
1504 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1505 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1513 return !isa<ScalableVectorType>(V->getType());
1521 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1522 case Intrinsic::aarch64_sve_fcvt_f16f32:
1523 case Intrinsic::aarch64_sve_fcvt_f16f64:
1524 case Intrinsic::aarch64_sve_fcvt_f32f16:
1525 case Intrinsic::aarch64_sve_fcvt_f32f64:
1526 case Intrinsic::aarch64_sve_fcvt_f64f16:
1527 case Intrinsic::aarch64_sve_fcvt_f64f32:
1528 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1529 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1530 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1531 case Intrinsic::aarch64_sve_fcvtzs:
1532 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1533 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1534 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1535 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1536 case Intrinsic::aarch64_sve_fcvtzu:
1537 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1538 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1539 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1540 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1541 case Intrinsic::aarch64_sve_revb:
1542 case Intrinsic::aarch64_sve_revh:
1543 case Intrinsic::aarch64_sve_revw:
1544 case Intrinsic::aarch64_sve_revd:
1545 case Intrinsic::aarch64_sve_scvtf:
1546 case Intrinsic::aarch64_sve_scvtf_f16i32:
1547 case Intrinsic::aarch64_sve_scvtf_f16i64:
1548 case Intrinsic::aarch64_sve_scvtf_f32i64:
1549 case Intrinsic::aarch64_sve_scvtf_f64i32:
1550 case Intrinsic::aarch64_sve_ucvtf:
1551 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1552 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1553 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1554 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1557 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1558 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1559 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1560 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1563 case Intrinsic::aarch64_sve_fabd:
1565 case Intrinsic::aarch64_sve_fadd:
1568 case Intrinsic::aarch64_sve_fdiv:
1571 case Intrinsic::aarch64_sve_fmax:
1573 case Intrinsic::aarch64_sve_fmaxnm:
1575 case Intrinsic::aarch64_sve_fmin:
1577 case Intrinsic::aarch64_sve_fminnm:
1579 case Intrinsic::aarch64_sve_fmla:
1581 case Intrinsic::aarch64_sve_fmls:
1583 case Intrinsic::aarch64_sve_fmul:
1586 case Intrinsic::aarch64_sve_fmulx:
1588 case Intrinsic::aarch64_sve_fnmla:
1590 case Intrinsic::aarch64_sve_fnmls:
1592 case Intrinsic::aarch64_sve_fsub:
1595 case Intrinsic::aarch64_sve_add:
1598 case Intrinsic::aarch64_sve_mla:
1600 case Intrinsic::aarch64_sve_mls:
1602 case Intrinsic::aarch64_sve_mul:
1605 case Intrinsic::aarch64_sve_sabd:
1607 case Intrinsic::aarch64_sve_sdiv:
1610 case Intrinsic::aarch64_sve_smax:
1612 case Intrinsic::aarch64_sve_smin:
1614 case Intrinsic::aarch64_sve_smulh:
1616 case Intrinsic::aarch64_sve_sub:
1619 case Intrinsic::aarch64_sve_uabd:
1621 case Intrinsic::aarch64_sve_udiv:
1624 case Intrinsic::aarch64_sve_umax:
1626 case Intrinsic::aarch64_sve_umin:
1628 case Intrinsic::aarch64_sve_umulh:
1630 case Intrinsic::aarch64_sve_asr:
1633 case Intrinsic::aarch64_sve_lsl:
1636 case Intrinsic::aarch64_sve_lsr:
1639 case Intrinsic::aarch64_sve_and:
1642 case Intrinsic::aarch64_sve_bic:
1644 case Intrinsic::aarch64_sve_eor:
1647 case Intrinsic::aarch64_sve_orr:
1650 case Intrinsic::aarch64_sve_shsub:
1652 case Intrinsic::aarch64_sve_shsubr:
1654 case Intrinsic::aarch64_sve_sqrshl:
1656 case Intrinsic::aarch64_sve_sqshl:
1658 case Intrinsic::aarch64_sve_sqsub:
1660 case Intrinsic::aarch64_sve_srshl:
1662 case Intrinsic::aarch64_sve_uhsub:
1664 case Intrinsic::aarch64_sve_uhsubr:
1666 case Intrinsic::aarch64_sve_uqrshl:
1668 case Intrinsic::aarch64_sve_uqshl:
1670 case Intrinsic::aarch64_sve_uqsub:
1672 case Intrinsic::aarch64_sve_urshl:
1675 case Intrinsic::aarch64_sve_add_u:
1678 case Intrinsic::aarch64_sve_and_u:
1681 case Intrinsic::aarch64_sve_asr_u:
1684 case Intrinsic::aarch64_sve_eor_u:
1687 case Intrinsic::aarch64_sve_fadd_u:
1690 case Intrinsic::aarch64_sve_fdiv_u:
1693 case Intrinsic::aarch64_sve_fmul_u:
1696 case Intrinsic::aarch64_sve_fsub_u:
1699 case Intrinsic::aarch64_sve_lsl_u:
1702 case Intrinsic::aarch64_sve_lsr_u:
1705 case Intrinsic::aarch64_sve_mul_u:
1708 case Intrinsic::aarch64_sve_orr_u:
1711 case Intrinsic::aarch64_sve_sdiv_u:
1714 case Intrinsic::aarch64_sve_sub_u:
1717 case Intrinsic::aarch64_sve_udiv_u:
1721 case Intrinsic::aarch64_sve_addqv:
1722 case Intrinsic::aarch64_sve_and_z:
1723 case Intrinsic::aarch64_sve_bic_z:
1724 case Intrinsic::aarch64_sve_brka_z:
1725 case Intrinsic::aarch64_sve_brkb_z:
1726 case Intrinsic::aarch64_sve_brkn_z:
1727 case Intrinsic::aarch64_sve_brkpa_z:
1728 case Intrinsic::aarch64_sve_brkpb_z:
1729 case Intrinsic::aarch64_sve_cntp:
1730 case Intrinsic::aarch64_sve_compact:
1731 case Intrinsic::aarch64_sve_eor_z:
1732 case Intrinsic::aarch64_sve_eorv:
1733 case Intrinsic::aarch64_sve_eorqv:
1734 case Intrinsic::aarch64_sve_nand_z:
1735 case Intrinsic::aarch64_sve_nor_z:
1736 case Intrinsic::aarch64_sve_orn_z:
1737 case Intrinsic::aarch64_sve_orr_z:
1738 case Intrinsic::aarch64_sve_orv:
1739 case Intrinsic::aarch64_sve_orqv:
1740 case Intrinsic::aarch64_sve_pnext:
1741 case Intrinsic::aarch64_sve_rdffr_z:
1742 case Intrinsic::aarch64_sve_saddv:
1743 case Intrinsic::aarch64_sve_uaddv:
1744 case Intrinsic::aarch64_sve_umaxv:
1745 case Intrinsic::aarch64_sve_umaxqv:
1746 case Intrinsic::aarch64_sve_cmpeq:
1747 case Intrinsic::aarch64_sve_cmpeq_wide:
1748 case Intrinsic::aarch64_sve_cmpge:
1749 case Intrinsic::aarch64_sve_cmpge_wide:
1750 case Intrinsic::aarch64_sve_cmpgt:
1751 case Intrinsic::aarch64_sve_cmpgt_wide:
1752 case Intrinsic::aarch64_sve_cmphi:
1753 case Intrinsic::aarch64_sve_cmphi_wide:
1754 case Intrinsic::aarch64_sve_cmphs:
1755 case Intrinsic::aarch64_sve_cmphs_wide:
1756 case Intrinsic::aarch64_sve_cmple_wide:
1757 case Intrinsic::aarch64_sve_cmplo_wide:
1758 case Intrinsic::aarch64_sve_cmpls_wide:
1759 case Intrinsic::aarch64_sve_cmplt_wide:
1760 case Intrinsic::aarch64_sve_cmpne:
1761 case Intrinsic::aarch64_sve_cmpne_wide:
1762 case Intrinsic::aarch64_sve_facge:
1763 case Intrinsic::aarch64_sve_facgt:
1764 case Intrinsic::aarch64_sve_fcmpeq:
1765 case Intrinsic::aarch64_sve_fcmpge:
1766 case Intrinsic::aarch64_sve_fcmpgt:
1767 case Intrinsic::aarch64_sve_fcmpne:
1768 case Intrinsic::aarch64_sve_fcmpuo:
1769 case Intrinsic::aarch64_sve_ld1:
1770 case Intrinsic::aarch64_sve_ld1_gather:
1771 case Intrinsic::aarch64_sve_ld1_gather_index:
1772 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1773 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1774 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1775 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1776 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1777 case Intrinsic::aarch64_sve_ld1q_gather_index:
1778 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1779 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1780 case Intrinsic::aarch64_sve_ld1ro:
1781 case Intrinsic::aarch64_sve_ld1rq:
1782 case Intrinsic::aarch64_sve_ld1udq:
1783 case Intrinsic::aarch64_sve_ld1uwq:
1784 case Intrinsic::aarch64_sve_ld2_sret:
1785 case Intrinsic::aarch64_sve_ld2q_sret:
1786 case Intrinsic::aarch64_sve_ld3_sret:
1787 case Intrinsic::aarch64_sve_ld3q_sret:
1788 case Intrinsic::aarch64_sve_ld4_sret:
1789 case Intrinsic::aarch64_sve_ld4q_sret:
1790 case Intrinsic::aarch64_sve_ldff1:
1791 case Intrinsic::aarch64_sve_ldff1_gather:
1792 case Intrinsic::aarch64_sve_ldff1_gather_index:
1793 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1794 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1795 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1796 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1797 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1798 case Intrinsic::aarch64_sve_ldnf1:
1799 case Intrinsic::aarch64_sve_ldnt1:
1800 case Intrinsic::aarch64_sve_ldnt1_gather:
1801 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1802 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1803 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1806 case Intrinsic::aarch64_sve_prf:
1807 case Intrinsic::aarch64_sve_prfb_gather_index:
1808 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1809 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1810 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1811 case Intrinsic::aarch64_sve_prfd_gather_index:
1812 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1813 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1814 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1815 case Intrinsic::aarch64_sve_prfh_gather_index:
1816 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1817 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1818 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1819 case Intrinsic::aarch64_sve_prfw_gather_index:
1820 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1821 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1822 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1825 case Intrinsic::aarch64_sve_st1_scatter:
1826 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1827 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1828 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1829 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1830 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1831 case Intrinsic::aarch64_sve_st1dq:
1832 case Intrinsic::aarch64_sve_st1q_scatter_index:
1833 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1834 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1835 case Intrinsic::aarch64_sve_st1wq:
1836 case Intrinsic::aarch64_sve_stnt1:
1837 case Intrinsic::aarch64_sve_stnt1_scatter:
1838 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1839 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1840 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1842 case Intrinsic::aarch64_sve_st2:
1843 case Intrinsic::aarch64_sve_st2q:
1845 case Intrinsic::aarch64_sve_st3:
1846 case Intrinsic::aarch64_sve_st3q:
1848 case Intrinsic::aarch64_sve_st4:
1849 case Intrinsic::aarch64_sve_st4q:
1857 Value *UncastedPred;
1863 Pred = UncastedPred;
1869 if (OrigPredTy->getMinNumElements() <=
1871 ->getMinNumElements())
1872 Pred = UncastedPred;
1876 return C &&
C->isAllOnesValue();
1883 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1884 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1892static std::optional<Instruction *>
1899 Value *Op1 =
II.getOperand(1);
1900 Value *Op2 =
II.getOperand(2);
1926 return std::nullopt;
1934 if (SimpleII == Inactive)
1944static std::optional<Instruction *>
1948 return std::nullopt;
1977 II.setCalledFunction(NewDecl);
1987 return std::nullopt;
1999static std::optional<Instruction *>
2003 return std::nullopt;
2005 auto IntrinsicID = BinOp->getIntrinsicID();
2006 switch (IntrinsicID) {
2007 case Intrinsic::aarch64_sve_and_z:
2008 case Intrinsic::aarch64_sve_bic_z:
2009 case Intrinsic::aarch64_sve_eor_z:
2010 case Intrinsic::aarch64_sve_nand_z:
2011 case Intrinsic::aarch64_sve_nor_z:
2012 case Intrinsic::aarch64_sve_orn_z:
2013 case Intrinsic::aarch64_sve_orr_z:
2016 return std::nullopt;
2019 auto BinOpPred = BinOp->getOperand(0);
2020 auto BinOpOp1 = BinOp->getOperand(1);
2021 auto BinOpOp2 = BinOp->getOperand(2);
2025 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
2026 return std::nullopt;
2028 auto PredOp = PredIntr->getOperand(0);
2030 if (PredOpTy !=
II.getType())
2031 return std::nullopt;
2035 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
2036 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2037 if (BinOpOp1 == BinOpOp2)
2038 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
2041 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
2043 auto NarrowedBinOp =
2048static std::optional<Instruction *>
2055 return BinOpCombine;
2060 return std::nullopt;
2063 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
2072 if (CursorVTy->getElementCount().getKnownMinValue() <
2073 IVTy->getElementCount().getKnownMinValue())
2077 if (Cursor->getType() == IVTy)
2078 EarliestReplacement = Cursor;
2083 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
2084 Intrinsic::aarch64_sve_convert_to_svbool ||
2085 IntrinsicCursor->getIntrinsicID() ==
2086 Intrinsic::aarch64_sve_convert_from_svbool))
2089 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
2090 Cursor = IntrinsicCursor->getOperand(0);
2095 if (!EarliestReplacement)
2096 return std::nullopt;
2104 auto *OpPredicate =
II.getOperand(0);
2121 II.getArgOperand(2));
2127 return std::nullopt;
2131 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2140 II.getArgOperand(0));
2150 return std::nullopt;
2155 if (!SplatValue || !SplatValue->isZero())
2156 return std::nullopt;
2161 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2162 return std::nullopt;
2166 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2167 return std::nullopt;
2170 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2171 return std::nullopt;
2176 return std::nullopt;
2179 return std::nullopt;
2183 return std::nullopt;
2187 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2188 return std::nullopt;
2190 unsigned NumElts = VecTy->getNumElements();
2191 unsigned PredicateBits = 0;
2194 for (
unsigned I = 0;
I < NumElts; ++
I) {
2197 return std::nullopt;
2199 PredicateBits |= 1 << (
I * (16 / NumElts));
2203 if (PredicateBits == 0) {
2205 PFalse->takeName(&
II);
2211 for (
unsigned I = 0;
I < 16; ++
I)
2212 if ((PredicateBits & (1 <<
I)) != 0)
2215 unsigned PredSize = Mask & -Mask;
2220 for (
unsigned I = 0;
I < 16;
I += PredSize)
2221 if ((PredicateBits & (1 <<
I)) == 0)
2222 return std::nullopt;
2224 auto *ConvertToSVBool =
2227 auto *ConvertFromSVBool =
2229 II.getType(), ConvertToSVBool);
2237 Value *Pg =
II.getArgOperand(0);
2238 Value *Vec =
II.getArgOperand(1);
2239 auto IntrinsicID =
II.getIntrinsicID();
2240 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2252 auto OpC = OldBinOp->getOpcode();
2258 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2264 if (IsAfter &&
C &&
C->isNullValue()) {
2268 Extract->insertBefore(
II.getIterator());
2269 Extract->takeName(&
II);
2275 return std::nullopt;
2277 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2278 return std::nullopt;
2280 const auto PTruePattern =
2286 return std::nullopt;
2288 unsigned Idx = MinNumElts - 1;
2298 if (Idx >= PgVTy->getMinNumElements())
2299 return std::nullopt;
2304 Extract->insertBefore(
II.getIterator());
2305 Extract->takeName(&
II);
2318 Value *Pg =
II.getArgOperand(0);
2320 Value *Vec =
II.getArgOperand(2);
2323 if (!Ty->isIntegerTy())
2324 return std::nullopt;
2329 return std::nullopt;
2346 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2361static std::optional<Instruction *>
2365 if (
Pattern == AArch64SVEPredPattern::all) {
2374 return MinNumElts && NumElts >= MinNumElts
2376 II, ConstantInt::get(
II.getType(), MinNumElts)))
2380static std::optional<Instruction *>
2383 if (!ST->isStreaming())
2384 return std::nullopt;
2396 Value *PgVal =
II.getArgOperand(0);
2397 Value *OpVal =
II.getArgOperand(1);
2401 if (PgVal == OpVal &&
2402 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2403 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2418 return std::nullopt;
2422 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2423 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2437 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2438 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2439 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2440 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2441 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2442 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2443 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2444 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2445 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2446 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2447 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2448 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2449 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2459 return std::nullopt;
2462template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2463static std::optional<Instruction *>
2465 bool MergeIntoAddendOp) {
2467 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2468 if (MergeIntoAddendOp) {
2469 AddendOp =
II.getOperand(1);
2470 Mul =
II.getOperand(2);
2472 AddendOp =
II.getOperand(2);
2473 Mul =
II.getOperand(1);
2478 return std::nullopt;
2480 if (!
Mul->hasOneUse())
2481 return std::nullopt;
2484 if (
II.getType()->isFPOrFPVectorTy()) {
2489 return std::nullopt;
2491 return std::nullopt;
2496 if (MergeIntoAddendOp)
2506static std::optional<Instruction *>
2508 Value *Pred =
II.getOperand(0);
2509 Value *PtrOp =
II.getOperand(1);
2510 Type *VecTy =
II.getType();
2514 Load->copyMetadata(
II);
2525static std::optional<Instruction *>
2527 Value *VecOp =
II.getOperand(0);
2528 Value *Pred =
II.getOperand(1);
2529 Value *PtrOp =
II.getOperand(2);
2533 Store->copyMetadata(
II);
2545 case Intrinsic::aarch64_sve_fmul_u:
2546 return Instruction::BinaryOps::FMul;
2547 case Intrinsic::aarch64_sve_fadd_u:
2548 return Instruction::BinaryOps::FAdd;
2549 case Intrinsic::aarch64_sve_fsub_u:
2550 return Instruction::BinaryOps::FSub;
2552 return Instruction::BinaryOpsEnd;
2556static std::optional<Instruction *>
2559 if (
II.isStrictFP())
2560 return std::nullopt;
2562 auto *OpPredicate =
II.getOperand(0);
2564 if (BinOpCode == Instruction::BinaryOpsEnd ||
2566 return std::nullopt;
2568 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2572static std::optional<Instruction *>
2574 assert(
II.getIntrinsicID() == Intrinsic::aarch64_sve_mla_u &&
2575 "Expected MLA_U intrinsic");
2576 Value *Acc =
II.getArgOperand(1);
2577 Value *MulOp0 =
II.getArgOperand(2);
2578 Value *MulOp1 =
II.getArgOperand(3);
2593 II.setArgOperand(2, MulOp1);
2594 II.setArgOperand(3, MulOp0);
2598 return std::nullopt;
2601static std::optional<Instruction *>
2603 assert((
II.getIntrinsicID() == Intrinsic::aarch64_sve_sadalp ||
2604 II.getIntrinsicID() == Intrinsic::aarch64_sve_uadalp) &&
2605 "Expected SADALP or UADALP intrinsic");
2609 return std::nullopt;
2614 return std::nullopt;
2618 II.getIntrinsicID(), {II.getType()},
2619 {II.getArgOperand(0), Acc, II.getArgOperand(2)});
2629 Intrinsic::aarch64_sve_mla>(
2633 Intrinsic::aarch64_sve_mad>(
2636 return std::nullopt;
2639static std::optional<Instruction *>
2643 Intrinsic::aarch64_sve_fmla>(IC,
II,
2648 Intrinsic::aarch64_sve_fmad>(IC,
II,
2653 Intrinsic::aarch64_sve_fmla>(IC,
II,
2656 return std::nullopt;
2659static std::optional<Instruction *>
2663 Intrinsic::aarch64_sve_fmla>(IC,
II,
2668 Intrinsic::aarch64_sve_fmad>(IC,
II,
2673 Intrinsic::aarch64_sve_fmla_u>(
2679static std::optional<Instruction *>
2683 Intrinsic::aarch64_sve_fmls>(IC,
II,
2688 Intrinsic::aarch64_sve_fnmsb>(
2693 Intrinsic::aarch64_sve_fmls>(IC,
II,
2696 return std::nullopt;
2699static std::optional<Instruction *>
2703 Intrinsic::aarch64_sve_fmls>(IC,
II,
2708 Intrinsic::aarch64_sve_fnmsb>(
2713 Intrinsic::aarch64_sve_fmls_u>(
2722 Intrinsic::aarch64_sve_mls>(
2725 return std::nullopt;
2730 Value *UnpackArg =
II.getArgOperand(0);
2732 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2733 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2746 return std::nullopt;
2750 auto *OpVal =
II.getOperand(0);
2751 auto *OpIndices =
II.getOperand(1);
2758 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2759 return std::nullopt;
2774 Type *RetTy =
II.getType();
2775 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2776 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2780 if ((
match(
II.getArgOperand(0),
2787 if (TyA ==
B->getType() &&
2792 TyA->getMinNumElements());
2798 return std::nullopt;
2806 if (
match(
II.getArgOperand(0),
2811 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2813 return std::nullopt;
2816static std::optional<Instruction *>
2818 Value *Mask =
II.getOperand(0);
2819 Value *BasePtr =
II.getOperand(1);
2820 Value *Index =
II.getOperand(2);
2831 BasePtr->getPointerAlignment(
II.getDataLayout());
2834 BasePtr, IndexBase);
2841 return std::nullopt;
2844static std::optional<Instruction *>
2846 Value *Val =
II.getOperand(0);
2847 Value *Mask =
II.getOperand(1);
2848 Value *BasePtr =
II.getOperand(2);
2849 Value *Index =
II.getOperand(3);
2859 BasePtr->getPointerAlignment(
II.getDataLayout());
2862 BasePtr, IndexBase);
2868 return std::nullopt;
2874 Value *Pred =
II.getOperand(0);
2875 Value *Vec =
II.getOperand(1);
2876 Value *DivVec =
II.getOperand(2);
2880 if (!SplatConstantInt)
2881 return std::nullopt;
2885 if (DivisorValue == -1)
2886 return std::nullopt;
2887 if (DivisorValue == 1)
2893 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2900 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2902 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2906 return std::nullopt;
2910 size_t VecSize = Vec.
size();
2915 size_t HalfVecSize = VecSize / 2;
2919 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2927 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2945 return std::nullopt;
2952 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2953 CurrentInsertElt = InsertElt->getOperand(0);
2959 return std::nullopt;
2963 for (
size_t I = 0;
I < Elts.
size();
I++) {
2964 if (Elts[
I] ==
nullptr)
2969 if (InsertEltChain ==
nullptr)
2970 return std::nullopt;
2976 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2977 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2978 IIScalableTy->getMinNumElements() /
2983 auto *WideShuffleMaskTy =
2994 auto NarrowBitcast =
3007 return std::nullopt;
3012 Value *Pred =
II.getOperand(0);
3013 Value *Vec =
II.getOperand(1);
3014 Value *Shift =
II.getOperand(2);
3017 Value *AbsPred, *MergedValue;
3023 return std::nullopt;
3031 return std::nullopt;
3036 return std::nullopt;
3039 {
II.getType()}, {Pred, Vec, Shift});
3046 Value *Vec =
II.getOperand(0);
3051 return std::nullopt;
3057 auto *NI =
II.getNextNode();
3060 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
3062 while (LookaheadThreshold-- && CanSkipOver(NI)) {
3063 auto *NIBB = NI->getParent();
3064 NI = NI->getNextNode();
3066 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
3067 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
3073 if (NextII &&
II.isIdenticalTo(NextII))
3076 return std::nullopt;
3084 {II.getType(), II.getOperand(0)->getType()},
3085 {II.getOperand(0), II.getOperand(1)}));
3092 if (PredPattern == AArch64SVEPredPattern::all ||
3093 PredPattern == AArch64SVEPredPattern::pow2)
3095 return std::nullopt;
3101 Value *Passthru =
II.getOperand(0);
3109 auto *Mask = ConstantInt::get(Ty, MaskValue);
3115 return std::nullopt;
3118static std::optional<Instruction *>
3125 return std::nullopt;
3128std::optional<Instruction *>
3139 case Intrinsic::aarch64_dmb:
3141 case Intrinsic::aarch64_neon_fmaxnm:
3142 case Intrinsic::aarch64_neon_fminnm:
3144 case Intrinsic::aarch64_sve_convert_from_svbool:
3146 case Intrinsic::aarch64_sve_dup:
3148 case Intrinsic::aarch64_sve_dup_x:
3150 case Intrinsic::aarch64_sve_cmpne:
3151 case Intrinsic::aarch64_sve_cmpne_wide:
3153 case Intrinsic::aarch64_sve_rdffr:
3155 case Intrinsic::aarch64_sve_lasta:
3156 case Intrinsic::aarch64_sve_lastb:
3158 case Intrinsic::aarch64_sve_clasta_n:
3159 case Intrinsic::aarch64_sve_clastb_n:
3161 case Intrinsic::aarch64_sve_cntd:
3163 case Intrinsic::aarch64_sve_cntw:
3165 case Intrinsic::aarch64_sve_cnth:
3167 case Intrinsic::aarch64_sve_cntb:
3169 case Intrinsic::aarch64_sme_cntsd:
3171 case Intrinsic::aarch64_sve_ptest_any:
3172 case Intrinsic::aarch64_sve_ptest_first:
3173 case Intrinsic::aarch64_sve_ptest_last:
3175 case Intrinsic::aarch64_sve_fadd:
3177 case Intrinsic::aarch64_sve_fadd_u:
3179 case Intrinsic::aarch64_sve_fmul_u:
3181 case Intrinsic::aarch64_sve_fsub:
3183 case Intrinsic::aarch64_sve_fsub_u:
3185 case Intrinsic::aarch64_sve_add:
3187 case Intrinsic::aarch64_sve_add_u:
3189 Intrinsic::aarch64_sve_mla_u>(
3191 case Intrinsic::aarch64_sve_mla_u:
3193 case Intrinsic::aarch64_sve_sadalp:
3194 case Intrinsic::aarch64_sve_uadalp:
3196 case Intrinsic::aarch64_sve_sub:
3198 case Intrinsic::aarch64_sve_sub_u:
3200 Intrinsic::aarch64_sve_mls_u>(
3202 case Intrinsic::aarch64_sve_tbl:
3204 case Intrinsic::aarch64_sve_uunpkhi:
3205 case Intrinsic::aarch64_sve_uunpklo:
3206 case Intrinsic::aarch64_sve_sunpkhi:
3207 case Intrinsic::aarch64_sve_sunpklo:
3209 case Intrinsic::aarch64_sve_uzp1:
3211 case Intrinsic::aarch64_sve_zip1:
3212 case Intrinsic::aarch64_sve_zip2:
3214 case Intrinsic::aarch64_sve_ld1_gather_index:
3216 case Intrinsic::aarch64_sve_st1_scatter_index:
3218 case Intrinsic::aarch64_sve_ld1:
3220 case Intrinsic::aarch64_sve_st1:
3222 case Intrinsic::aarch64_sve_sdiv:
3224 case Intrinsic::aarch64_sve_sel:
3226 case Intrinsic::aarch64_sve_srshl:
3228 case Intrinsic::aarch64_sve_dupq_lane:
3230 case Intrinsic::aarch64_sve_insr:
3232 case Intrinsic::aarch64_sve_whilelo:
3234 case Intrinsic::aarch64_sve_ptrue:
3236 case Intrinsic::aarch64_sve_uxtb:
3238 case Intrinsic::aarch64_sve_uxth:
3240 case Intrinsic::aarch64_sve_uxtw:
3242 case Intrinsic::aarch64_sme_in_streaming_mode:
3246 return std::nullopt;
3253 SimplifyAndSetOp)
const {
3254 switch (
II.getIntrinsicID()) {
3257 case Intrinsic::aarch64_neon_fcvtxn:
3258 case Intrinsic::aarch64_neon_rshrn:
3259 case Intrinsic::aarch64_neon_sqrshrn:
3260 case Intrinsic::aarch64_neon_sqrshrun:
3261 case Intrinsic::aarch64_neon_sqshrn:
3262 case Intrinsic::aarch64_neon_sqshrun:
3263 case Intrinsic::aarch64_neon_sqxtn:
3264 case Intrinsic::aarch64_neon_sqxtun:
3265 case Intrinsic::aarch64_neon_uqrshrn:
3266 case Intrinsic::aarch64_neon_uqshrn:
3267 case Intrinsic::aarch64_neon_uqxtn:
3268 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3272 return std::nullopt;
3276 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3286 if (ST->useSVEForFixedLengthVectors() &&
3289 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3290 else if (ST->isNeonAvailable())
3295 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3304bool AArch64TTIImpl::isSingleExtWideningInstruction(
3306 Type *SrcOverrideTy)
const {
3321 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3324 Type *SrcTy = SrcOverrideTy;
3326 case Instruction::Add:
3327 case Instruction::Sub: {
3336 if (Opcode == Instruction::Sub)
3360 assert(SrcTy &&
"Expected some SrcTy");
3362 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3368 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3370 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3374 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3377Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3379 Type *SrcOverrideTy)
const {
3380 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3381 Opcode != Instruction::Mul)
3391 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3394 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3400 ->getScalarSizeInBits();
3403 unsigned MaxEltSize = 0;
3406 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3407 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3408 MaxEltSize = std::max(EltSize0, EltSize1);
3411 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3412 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3415 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3417 MaxEltSize = DstEltSize / 2;
3418 }
else if (Opcode == Instruction::Mul &&
3431 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3435 if (MaxEltSize * 2 > DstEltSize)
3453 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3454 (Src->isScalableTy() && !ST->hasSVE2()))
3464 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3468 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3472 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3473 Src->getScalarSizeInBits() !=
3497 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3501 if (
I &&
I->hasOneUser()) {
3504 if (
Type *ExtTy = isBinExtWideningInstruction(
3505 SingleUser->getOpcode(), Dst, Operands,
3506 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3519 if (isSingleExtWideningInstruction(
3520 SingleUser->getOpcode(), Dst, Operands,
3521 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3525 if (SingleUser->getOpcode() == Instruction::Add) {
3526 if (
I == SingleUser->getOperand(1) ||
3528 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3543 EVT SrcTy = TLI->getValueType(
DL, Src);
3544 EVT DstTy = TLI->getValueType(
DL, Dst);
3546 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3551 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3580 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3583 ST->useSVEForFixedLengthVectors(WiderTy)) {
3584 std::pair<InstructionCost, MVT> LT =
3586 unsigned NumElements =
3602 const unsigned int SVE_EXT_COST = 1;
3603 const unsigned int SVE_FCVT_COST = 1;
3604 const unsigned int SVE_UNPACK_ONCE = 4;
3605 const unsigned int SVE_UNPACK_TWICE = 16;
3734 SVE_EXT_COST + SVE_FCVT_COST},
3739 SVE_EXT_COST + SVE_FCVT_COST},
3746 SVE_EXT_COST + SVE_FCVT_COST},
3750 SVE_EXT_COST + SVE_FCVT_COST},
3756 SVE_EXT_COST + SVE_FCVT_COST},
3759 SVE_EXT_COST + SVE_FCVT_COST},
3764 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3766 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3776 SVE_EXT_COST + SVE_FCVT_COST},
3781 SVE_EXT_COST + SVE_FCVT_COST},
3794 SVE_EXT_COST + SVE_FCVT_COST},
3798 SVE_EXT_COST + SVE_FCVT_COST},
3810 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3812 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3814 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3816 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3820 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3822 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3838 SVE_EXT_COST + SVE_FCVT_COST},
3843 SVE_EXT_COST + SVE_FCVT_COST},
3854 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3856 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3858 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3860 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3862 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3864 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3868 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3870 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3872 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3874 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
4099 if (ST->hasFullFP16())
4111 Src->getScalarType(), CCH,
CostKind) +
4119 ST->isSVEorStreamingSVEAvailable() &&
4120 TLI->getTypeAction(Src->getContext(), SrcTy) ==
4122 TLI->getTypeAction(Dst->getContext(), DstTy) ==
4131 Opcode, LegalTy, Src, CCH,
CostKind,
I);
4134 return Part1 + Part2;
4141 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
4153 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
4166 CostKind, Index,
nullptr,
nullptr);
4170 auto DstVT = TLI->getValueType(
DL, Dst);
4171 auto SrcVT = TLI->getValueType(
DL, Src);
4176 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4182 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4192 case Instruction::SExt:
4197 case Instruction::ZExt:
4198 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4211 return Opcode == Instruction::PHI ? 0 : 1;
4220 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4229 if (!LT.second.isVector())
4234 if (LT.second.isFixedLengthVector()) {
4235 unsigned Width = LT.second.getVectorNumElements();
4236 Index = Index % Width;
4251 if (ST->hasFastLD1Single())
4263 : ST->getVectorInsertExtractBaseCost() + 1;
4287 auto ExtractCanFuseWithFmul = [&]() {
4294 auto IsAllowedScalarTy = [&](
const Type *
T) {
4295 return T->isFloatTy() ||
T->isDoubleTy() ||
4296 (
T->isHalfTy() && ST->hasFullFP16());
4300 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4303 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4304 !BO->getType()->isVectorTy();
4309 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4313 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4322 DenseMap<User *, unsigned> UserToExtractIdx;
4323 for (
auto *U :
Scalar->users()) {
4324 if (!IsUserFMulScalarTy(U))
4328 UserToExtractIdx[
U];
4330 if (UserToExtractIdx.
empty())
4332 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4333 for (
auto *U : S->users()) {
4334 if (UserToExtractIdx.
contains(U)) {
4336 auto *Op0 =
FMul->getOperand(0);
4337 auto *Op1 =
FMul->getOperand(1);
4338 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4339 UserToExtractIdx[
U] =
L;
4345 for (
auto &[U, L] : UserToExtractIdx) {
4357 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4358 if (!IsUserFMulScalarTy(U))
4363 const auto *BO = cast<BinaryOperator>(U);
4364 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4365 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4367 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4370 return IsExtractLaneEquivalentToZero(
4371 cast<ConstantInt>(OtherEE->getIndexOperand())
4374 OtherEE->getType()->getScalarSizeInBits());
4382 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4383 ExtractCanFuseWithFmul())
4388 :
ST->getVectorInsertExtractBaseCost();
4397 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4400 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4406 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4408 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4409 ScalarUserAndIdx, VIC);
4416 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4423 unsigned Index)
const {
4435 : ST->getVectorInsertExtractBaseCost() + 1;
4444 if (Ty->getElementType()->isFloatingPointTy())
4447 unsigned VecInstCost =
4449 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4456 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4457 return std::nullopt;
4458 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4459 return std::nullopt;
4461 if (CanUseSVE && ST->hasSVEB16B16() && ST->isNonStreamingSVEorSME2Available())
4462 return std::nullopt;
4469 Cost += InstCost(PromotedTy);
4492 Op2Info, Args, CxtI);
4496 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4503 Ty,
CostKind, Op1Info, Op2Info,
true,
4506 [&](
Type *PromotedTy) {
4510 return *PromotedCost;
4513 if (Ty->getScalarType()->isFP128Ty())
4521 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4537 if (LT.second == MVT::v2i64) {
4617 auto VT = TLI->getValueType(
DL, Ty);
4618 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4622 : (3 * AsrCost + AddCost);
4624 return MulCost + AsrCost + 2 * AddCost;
4626 }
else if (VT.isVector()) {
4636 if (Ty->isScalableTy() && ST->hasSVE())
4637 Cost += 2 * AsrCost;
4642 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4646 }
else if (LT.second == MVT::v2i64) {
4647 return VT.getVectorNumElements() *
4654 if (Ty->isScalableTy() && ST->hasSVE())
4655 return MulCost + 2 * AddCost + 2 * AsrCost;
4656 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4661 LT.second.isFixedLengthVector()) {
4671 return ExtractCost + InsertCost +
4679 auto VT = TLI->getValueType(
DL, Ty);
4695 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4696 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4697 LT.second == MVT::nxv16i8;
4698 bool Is128bit = LT.second.is128BitVector();
4710 (HasMULH ? 0 : ShrCost) +
4711 AddCost * 2 + ShrCost;
4712 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4719 if (!VT.isVector() && VT.getSizeInBits() > 64)
4723 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4725 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4729 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4739 if (
nullptr != Entry)
4744 if (LT.second.getScalarType() == MVT::i8)
4746 else if (LT.second.getScalarType() == MVT::i16)
4758 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4759 return (4 + DivCost) * VTy->getNumElements();
4765 -1,
nullptr,
nullptr);
4788 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4789 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4798 if (!Ty->getScalarType()->isFP128Ty())
4805 if (!Ty->getScalarType()->isFP128Ty())
4806 return 2 * LT.first;
4813 if (!Ty->isVectorTy())
4829 int MaxMergeDistance = 64;
4833 return NumVectorInstToHideOverhead;
4843 unsigned Opcode1,
unsigned Opcode2)
const {
4846 if (!
Sched.hasInstrSchedModel())
4850 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4852 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4858 "Cannot handle variant scheduling classes without an MI");
4874 const int AmortizationCost = 20;
4882 VecPred = CurrentPred;
4890 static const auto ValidMinMaxTys = {
4891 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4892 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4893 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4897 (ST->hasFullFP16() &&
4903 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4904 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4905 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4906 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4907 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4908 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4909 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4910 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4911 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4912 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4913 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4915 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4916 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4925 if (Opcode == Instruction::FCmp) {
4927 ValTy,
CostKind, Op1Info, Op2Info,
false,
4929 false, [&](
Type *PromotedTy) {
4941 return *PromotedCost;
4945 if (LT.second.getScalarType() != MVT::f64 &&
4946 LT.second.getScalarType() != MVT::f32 &&
4947 LT.second.getScalarType() != MVT::f16)
4952 unsigned Factor = 1;
4953 if (!CondTy->isVectorTy() &&
4967 AArch64::FCMEQv4f32))
4979 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4998 Op1Info, Op2Info,
I);
5004 if (ST->requiresStrictAlign()) {
5009 Options.AllowOverlappingLoads =
true;
5010 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
5015 Options.LoadSizes = {8, 4, 2, 1};
5016 Options.AllowedTailExpansions = {3, 5, 6};
5021 return ST->hasSVE();
5027 switch (MICA.
getID()) {
5028 case Intrinsic::masked_scatter:
5029 case Intrinsic::masked_gather:
5031 case Intrinsic::masked_load:
5032 case Intrinsic::masked_expandload:
5033 case Intrinsic::masked_store:
5047 if (!LT.first.isValid())
5052 if (VT->getElementType()->isIntegerTy(1))
5063 if (MICA.
getID() == Intrinsic::masked_expandload) {
5079 if (LT.first > 1 && LT.second.getScalarSizeInBits() > 8)
5080 return MemOpCost * 2;
5089 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5090 "Should be called on only load or stores.");
5092 case Instruction::Load:
5095 return ST->getGatherOverhead();
5097 case Instruction::Store:
5100 return ST->getScatterOverhead();
5111 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
5112 MICA.
getID() == Intrinsic::vp_gather)
5114 : Instruction::Store;
5124 if (!LT.first.isValid())
5128 if (!LT.second.isVector() ||
5130 VT->getElementType()->isIntegerTy(1))
5140 ElementCount LegalVF = LT.second.getVectorElementCount();
5143 {TTI::OK_AnyValue, TTI::OP_None},
I);
5159 EVT VT = TLI->getValueType(
DL, Ty,
true);
5161 if (VT == MVT::Other)
5166 if (!LT.first.isValid())
5176 (VTy->getElementType()->isIntegerTy(1) &&
5177 !VTy->getElementCount().isKnownMultipleOf(
5187 if (Opcode == Instruction::Store)
5191 if (ST->getFixedLoadLatency())
5192 return (LT.first - 1) + ST->getFixedLoadLatency();
5201 if (LT.second.isScalableVector() ||
5202 ST->useSVEForFixedLengthVectors(LT.second)) {
5203 Inst = AArch64::LDR_ZXI;
5204 }
else if (LT.second.isVector() || LT.second.isFloatingPoint()) {
5205 switch (LT.second.getSizeInBits()) {
5207 Inst = AArch64::LDRBui;
5210 Inst = AArch64::LDRHui;
5213 Inst = AArch64::LDRSui;
5216 Inst = AArch64::LDRDui;
5219 Inst = AArch64::LDRQui;
5225 switch (LT.second.getSizeInBits()) {
5227 Inst = AArch64::LDRBBui;
5230 Inst = AArch64::LDRHHui;
5233 Inst = AArch64::LDRWui;
5236 Inst = AArch64::LDRXui;
5244 unsigned SchedClass =
TII->get(Inst).getSchedClass();
5248 float NumLoads = (LT.first - 1).
getValue();
5249 return NumLoads *
Sched.getReciprocalThroughput(*ST, *SCD) +
5250 Sched.computeInstrLatency(*ST, *SCD);
5253 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
5254 LT.second.is128BitVector() && Alignment <
Align(16)) {
5260 const int AmortizationCost = 6;
5262 return LT.first * 2 * AmortizationCost;
5266 if (Ty->isPtrOrPtrVectorTy())
5271 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
5273 if (VT == MVT::v4i8)
5280 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5295 while (!TypeWorklist.
empty()) {
5317 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5318 assert(Factor >= 2 &&
"Invalid interleave factor");
5333 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5336 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5337 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5340 VecVTy->getElementCount().divideCoefficientBy(Factor));
5346 if (MinElts % Factor == 0 &&
5347 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5348 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5353 UseMaskForCond, UseMaskForGaps);
5360 for (
auto *
I : Tys) {
5361 if (!
I->isVectorTy())
5372 Align Alignment)
const {
5379 return (ST->isSVEAvailable() && ST->hasSVE2p2()) ||
5380 (ST->isSVEorStreamingSVEAvailable() && ST->hasSME2p2());
5385 bool HasUnorderedReductions)
const {
5388 return ST->getMaxInterleaveFactor();
5398 enum { MaxStridedLoads = 7 };
5400 int StridedLoads = 0;
5403 for (
const auto BB : L->blocks()) {
5404 for (
auto &
I : *BB) {
5410 if (L->isLoopInvariant(PtrValue))
5415 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5424 if (StridedLoads > MaxStridedLoads / 2)
5425 return StridedLoads;
5428 return StridedLoads;
5431 int StridedLoads = countStridedLoads(L, SE);
5433 <<
" strided loads\n");
5449 unsigned *FinalSize) {
5453 for (
auto *BB : L->getBlocks()) {
5454 for (
auto &
I : *BB) {
5460 if (!Cost.isValid())
5464 if (LoopCost > Budget)
5486 if (MaxTC > 0 && MaxTC <= 32)
5497 if (Blocks.
size() != 2)
5519 if (!L->isInnermost() || L->getNumBlocks() > 8)
5523 if (!L->getExitBlock())
5529 bool HasParellelizableReductions =
5530 L->getNumBlocks() == 1 &&
5531 any_of(L->getHeader()->phis(),
5533 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5536 if (HasParellelizableReductions &&
5558 if (HasParellelizableReductions) {
5569 if (Header == Latch) {
5572 unsigned Width = 10;
5578 unsigned MaxInstsPerLine = 16;
5580 unsigned BestUC = 1;
5581 unsigned SizeWithBestUC = BestUC *
Size;
5583 unsigned SizeWithUC = UC *
Size;
5584 if (SizeWithUC > 48)
5586 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5587 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5589 SizeWithBestUC = BestUC *
Size;
5599 for (
auto *BB : L->blocks()) {
5600 for (
auto &
I : *BB) {
5610 for (
auto *U :
I.users())
5612 LoadedValuesPlus.
insert(U);
5619 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5645 auto *I = dyn_cast<Instruction>(V);
5646 return I && DependsOnLoopLoad(I, Depth + 1);
5653 DependsOnLoopLoad(
I, 0)) {
5669 if (L->getLoopDepth() > 1)
5680 for (
auto *BB : L->getBlocks()) {
5681 for (
auto &
I : *BB) {
5685 if (IsVectorized &&
I.getType()->isVectorTy())
5702 if (ST->isAppleMLike())
5704 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5726 !ST->getSchedModel().isOutOfOrder()) {
5749 bool CanCreate)
const {
5753 case Intrinsic::aarch64_neon_st1x2:
5754 case Intrinsic::aarch64_neon_st1x3:
5755 case Intrinsic::aarch64_neon_st1x4:
5756 case Intrinsic::aarch64_neon_st2:
5757 case Intrinsic::aarch64_neon_st3:
5758 case Intrinsic::aarch64_neon_st4: {
5761 if (!CanCreate || !ST)
5763 unsigned NumElts = Inst->
arg_size() - 1;
5764 if (ST->getNumElements() != NumElts)
5766 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5772 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5774 Res = Builder.CreateInsertValue(Res, L, i);
5778 case Intrinsic::aarch64_neon_ld1x2:
5779 case Intrinsic::aarch64_neon_ld1x3:
5780 case Intrinsic::aarch64_neon_ld1x4:
5781 case Intrinsic::aarch64_neon_ld2:
5782 case Intrinsic::aarch64_neon_ld3:
5783 case Intrinsic::aarch64_neon_ld4:
5784 if (Inst->
getType() == ExpectedType)
5795 case Intrinsic::aarch64_neon_ld1x2:
5796 case Intrinsic::aarch64_neon_ld1x3:
5797 case Intrinsic::aarch64_neon_ld1x4:
5798 case Intrinsic::aarch64_neon_ld2:
5799 case Intrinsic::aarch64_neon_ld3:
5800 case Intrinsic::aarch64_neon_ld4:
5801 Info.ReadMem =
true;
5802 Info.WriteMem =
false;
5805 case Intrinsic::aarch64_neon_st1x2:
5806 case Intrinsic::aarch64_neon_st1x3:
5807 case Intrinsic::aarch64_neon_st1x4:
5808 case Intrinsic::aarch64_neon_st2:
5809 case Intrinsic::aarch64_neon_st3:
5810 case Intrinsic::aarch64_neon_st4:
5811 Info.ReadMem =
false;
5812 Info.WriteMem =
true;
5821 case Intrinsic::aarch64_neon_ld1x2:
5822 case Intrinsic::aarch64_neon_st1x2:
5823 Info.MatchingId = Intrinsic::aarch64_neon_ld1x2;
5825 case Intrinsic::aarch64_neon_ld1x3:
5826 case Intrinsic::aarch64_neon_st1x3:
5827 Info.MatchingId = Intrinsic::aarch64_neon_ld1x3;
5829 case Intrinsic::aarch64_neon_ld1x4:
5830 case Intrinsic::aarch64_neon_st1x4:
5831 Info.MatchingId = Intrinsic::aarch64_neon_ld1x4;
5833 case Intrinsic::aarch64_neon_ld2:
5834 case Intrinsic::aarch64_neon_st2:
5835 Info.MatchingId = Intrinsic::aarch64_neon_ld2;
5837 case Intrinsic::aarch64_neon_ld3:
5838 case Intrinsic::aarch64_neon_st3:
5839 Info.MatchingId = Intrinsic::aarch64_neon_ld3;
5841 case Intrinsic::aarch64_neon_ld4:
5842 case Intrinsic::aarch64_neon_st4:
5843 Info.MatchingId = Intrinsic::aarch64_neon_ld4;
5855 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5856 bool Considerable =
false;
5857 AllowPromotionWithoutCommonHeader =
false;
5860 Type *ConsideredSExtType =
5862 if (
I.getType() != ConsideredSExtType)
5866 for (
const User *U :
I.users()) {
5868 Considerable =
true;
5872 if (GEPInst->getNumOperands() > 2) {
5873 AllowPromotionWithoutCommonHeader =
true;
5878 return Considerable;
5929 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5939 return LegalizationCost + 2;
5949 LegalizationCost *= LT.first - 1;
5952 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5961 return LegalizationCost + 2;
5969 std::optional<FastMathFlags> FMF,
5985 return BaseCost + FixedVTy->getNumElements();
6002 MVT MTy = LT.second;
6003 int ISD = TLI->InstructionOpcodeToISD(Opcode);
6051 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
6052 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
6064 return (LT.first - 1) +
Log2_32(NElts);
6069 return (LT.first - 1) + Entry->Cost;
6081 if (LT.first != 1) {
6087 ExtraCost *= LT.first - 1;
6090 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
6091 return Cost + ExtraCost;
6099 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
6101 EVT VecVT = TLI->getValueType(
DL, VecTy);
6102 EVT ResVT = TLI->getValueType(
DL, ResTy);
6112 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6114 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
6116 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
6118 return (LT.first - 1) * 2 + 2;
6129 EVT VecVT = TLI->getValueType(
DL, VecTy);
6130 EVT ResVT = TLI->getValueType(
DL, ResTy);
6133 RedOpcode == Instruction::Add) {
6139 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
6141 return LT.first + 2;
6176 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
6177 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
6191 if (LT.second.getScalarType() == MVT::i1) {
6200 assert(Entry &&
"Illegal Type for Splice");
6201 LegalizationCost += Entry->Cost;
6202 return LegalizationCost * LT.first;
6206 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
6215 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
6216 Opcode != Instruction::FAdd && Opcode != Instruction::FSub) ||
6223 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
6224 if (!FMF->allowReassoc() || !FMF->allowContract())
6228 "FastMathFlags only apply to floating-point partial reductions");
6232 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
6233 "Unexpected values for OpBExtend or InputTypeB");
6237 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
6238 InputTypeA != InputTypeB))
6241 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
6244 if (IsUSDot && !ST->hasMatMulInt8() && !ST->hasDotProd())
6257 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
6266 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
6272 std::pair<InstructionCost, MVT> AccumLT =
6274 std::pair<InstructionCost, MVT> InputLT =
6278 auto IsSupported = [&](
bool SVEPred,
bool NEONPred) ->
bool {
6279 return (ST->isSVEorStreamingSVEAvailable() && SVEPred) ||
6280 (AccumLT.second.isFixedLengthVector() &&
6281 AccumLT.second.getSizeInBits() <= 128 && ST->isNeonAvailable() &&
6285 bool IsSub = Opcode == Instruction::Sub || Opcode == Instruction::FSub;
6293 if (AccumLT.second.getScalarType() == MVT::i32 &&
6294 InputLT.second.getScalarType() == MVT::i8) {
6296 if (!IsUSDot && IsSupported(
true, ST->hasDotProd()))
6297 return Cost + INegCost;
6299 if (IsUSDot && IsSupported(ST->hasMatMulInt8(), ST->hasMatMulInt8()))
6300 return Cost + INegCost;
6305 if (IsUSDot && IsSupported(
false, ST->hasDotProd()))
6306 return Cost * 3 + INegCost;
6309 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
6311 if (AccumLT.second.getScalarType() == MVT::i64 &&
6312 InputLT.second.getScalarType() == MVT::i16)
6313 return Cost + INegCost;
6316 if (AccumLT.second.getScalarType() == MVT::i32 &&
6317 InputLT.second.getScalarType() == MVT::i16 &&
6318 (ST->hasSVE2p1() || ST->hasSME2()) && !IsSub)
6321 if (AccumLT.second.getScalarType() == MVT::i64 &&
6322 InputLT.second.getScalarType() == MVT::i8)
6328 return Cost + INegCost;
6331 if (AccumLT.second.getScalarType() == MVT::i16 &&
6332 InputLT.second.getScalarType() == MVT::i8 &&
6333 (ST->hasSVE2p3() || ST->hasSME2p3()) && !IsSub)
6339 if (Opcode == Instruction::FAdd && !IsSub &&
6340 IsSupported(ST->hasSME2() || ST->hasSVE2p1(), ST->hasF16F32DOT()) &&
6341 AccumLT.second.getScalarType() == MVT::f32 &&
6342 InputLT.second.getScalarType() == MVT::f16)
6346 if (Ratio == 2 && !IsUSDot) {
6347 MVT InVT = InputLT.second.getScalarType();
6350 if (IsSupported(ST->hasSVE2() || ST->hasSME(),
true) &&
6355 if (IsSupported(ST->hasSVE2(), ST->hasFP16FML()) && InVT == MVT::f16)
6359 if (IsSupported(ST->hasSVE2p1() || ST->hasSME2(),
false) &&
6360 InVT == MVT::bf16 && IsSub)
6370 if (IsSupported(ST->hasBF16(), ST->hasBF16()) && InVT == MVT::bf16)
6371 return Cost * 2 + FNegCost;
6375 AccumType, VF, OpAExtend, OpBExtend,
6387 "Expected the Mask to match the return size if given");
6389 "Expected the same scalar types");
6395 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6396 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6397 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6405 return std::max<InstructionCost>(1, LT.first / 4);
6413 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6415 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6418 unsigned TpNumElts = Mask.size();
6419 unsigned LTNumElts = LT.second.getVectorNumElements();
6420 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6422 LT.second.getVectorElementCount());
6424 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6426 for (
unsigned N = 0;
N < NumVecs;
N++) {
6430 unsigned Source1 = -1U, Source2 = -1U;
6431 unsigned NumSources = 0;
6432 for (
unsigned E = 0; E < LTNumElts; E++) {
6433 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6442 unsigned Source = MaskElt / LTNumElts;
6443 if (NumSources == 0) {
6446 }
else if (NumSources == 1 && Source != Source1) {
6449 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6455 if (Source == Source1)
6457 else if (Source == Source2)
6458 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6467 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6478 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6481 Result.first->second = NCost;
6495 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6496 if (LT.second.getFixedSizeInBits() >= 128 &&
6498 LT.second.getVectorNumElements() / 2) {
6501 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6515 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6518 return M.value() < 0 || M.value() == (int)M.index();
6524 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6525 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6534 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6535 ST->isSVEorStreamingSVEAvailable() &&
6540 if (ST->isSVEorStreamingSVEAvailable() &&
6554 if (IsLoad && LT.second.isVector() &&
6556 LT.second.getVectorElementCount()))
6562 if (Mask.size() == 4 &&
6564 (SrcTy->getScalarSizeInBits() == 16 ||
6565 SrcTy->getScalarSizeInBits() == 32) &&
6566 all_of(Mask, [](
int E) {
return E < 8; }))
6572 if (LT.second.isFixedLengthVector() &&
6573 LT.second.getVectorNumElements() == Mask.size() &&
6579 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6580 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6581 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6582 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6583 LT.second.getVectorNumElements(), 16) ||
6584 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6585 LT.second.getVectorNumElements(), 32) ||
6586 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6587 LT.second.getVectorNumElements(), 64) ||
6590 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6719 return LT.first * Entry->Cost;
6728 LT.second.getSizeInBits() <= 128 && SubTp) {
6730 if (SubLT.second.isVector()) {
6731 int NumElts = LT.second.getVectorNumElements();
6732 int NumSubElts = SubLT.second.getVectorNumElements();
6733 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6739 if (IsExtractSubvector)
6756 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6775 return ST->useFixedOverScalableIfEqualCost();
6779 return ST->getEpilogueVectorizationMinVF();
6814 unsigned NumInsns = 0;
6816 NumInsns += BB->size();
6826 int64_t Scale,
unsigned AddrSpace)
const {
6854 if (
I->getOpcode() == Instruction::Or &&
6858 if (
I->getOpcode() == Instruction::Add ||
6859 I->getOpcode() == Instruction::Sub)
6884 return all_equal(Shuf->getShuffleMask());
6891 bool AllowSplat =
false) {
6896 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6897 auto *FullTy = FullV->
getType();
6898 auto *HalfTy = HalfV->getType();
6900 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6903 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6906 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6910 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6924 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6925 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6939 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6940 (M2Start != 0 && M2Start != (NumElements / 2)))
6942 if (S1Op1 && S2Op1 && M1Start != M2Start)
6952 return Ext->getType()->getScalarSizeInBits() ==
6953 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6967 Value *VectorOperand =
nullptr;
6984 if (!
GEP ||
GEP->getNumOperands() != 2)
6988 Value *Offsets =
GEP->getOperand(1);
6991 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6997 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6998 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6999 Ops.push_back(&
GEP->getOperandUse(1));
7035 switch (
II->getIntrinsicID()) {
7036 case Intrinsic::aarch64_neon_smull:
7037 case Intrinsic::aarch64_neon_umull:
7040 Ops.push_back(&
II->getOperandUse(0));
7041 Ops.push_back(&
II->getOperandUse(1));
7046 case Intrinsic::fma:
7047 case Intrinsic::fmuladd:
7054 Ops.push_back(&
II->getOperandUse(0));
7056 Ops.push_back(&
II->getOperandUse(1));
7059 case Intrinsic::aarch64_neon_sqdmull:
7060 case Intrinsic::aarch64_neon_sqdmulh:
7061 case Intrinsic::aarch64_neon_sqrdmulh:
7064 Ops.push_back(&
II->getOperandUse(0));
7066 Ops.push_back(&
II->getOperandUse(1));
7067 return !
Ops.empty();
7068 case Intrinsic::aarch64_neon_fmlal:
7069 case Intrinsic::aarch64_neon_fmlal2:
7070 case Intrinsic::aarch64_neon_fmlsl:
7071 case Intrinsic::aarch64_neon_fmlsl2:
7074 Ops.push_back(&
II->getOperandUse(1));
7076 Ops.push_back(&
II->getOperandUse(2));
7077 return !
Ops.empty();
7078 case Intrinsic::aarch64_sve_ptest_first:
7079 case Intrinsic::aarch64_sve_ptest_last:
7081 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
7082 Ops.push_back(&
II->getOperandUse(0));
7083 return !
Ops.empty();
7084 case Intrinsic::aarch64_sme_write_horiz:
7085 case Intrinsic::aarch64_sme_write_vert:
7086 case Intrinsic::aarch64_sme_writeq_horiz:
7087 case Intrinsic::aarch64_sme_writeq_vert: {
7089 if (!Idx || Idx->getOpcode() != Instruction::Add)
7091 Ops.push_back(&
II->getOperandUse(1));
7094 case Intrinsic::aarch64_sme_read_horiz:
7095 case Intrinsic::aarch64_sme_read_vert:
7096 case Intrinsic::aarch64_sme_readq_horiz:
7097 case Intrinsic::aarch64_sme_readq_vert:
7098 case Intrinsic::aarch64_sme_ld1b_vert:
7099 case Intrinsic::aarch64_sme_ld1h_vert:
7100 case Intrinsic::aarch64_sme_ld1w_vert:
7101 case Intrinsic::aarch64_sme_ld1d_vert:
7102 case Intrinsic::aarch64_sme_ld1q_vert:
7103 case Intrinsic::aarch64_sme_st1b_vert:
7104 case Intrinsic::aarch64_sme_st1h_vert:
7105 case Intrinsic::aarch64_sme_st1w_vert:
7106 case Intrinsic::aarch64_sme_st1d_vert:
7107 case Intrinsic::aarch64_sme_st1q_vert:
7108 case Intrinsic::aarch64_sme_ld1b_horiz:
7109 case Intrinsic::aarch64_sme_ld1h_horiz:
7110 case Intrinsic::aarch64_sme_ld1w_horiz:
7111 case Intrinsic::aarch64_sme_ld1d_horiz:
7112 case Intrinsic::aarch64_sme_ld1q_horiz:
7113 case Intrinsic::aarch64_sme_st1b_horiz:
7114 case Intrinsic::aarch64_sme_st1h_horiz:
7115 case Intrinsic::aarch64_sme_st1w_horiz:
7116 case Intrinsic::aarch64_sme_st1d_horiz:
7117 case Intrinsic::aarch64_sme_st1q_horiz: {
7119 if (!Idx || Idx->getOpcode() != Instruction::Add)
7121 Ops.push_back(&
II->getOperandUse(3));
7124 case Intrinsic::aarch64_neon_pmull:
7127 Ops.push_back(&
II->getOperandUse(0));
7128 Ops.push_back(&
II->getOperandUse(1));
7130 case Intrinsic::aarch64_neon_pmull64:
7132 II->getArgOperand(1)))
7134 Ops.push_back(&
II->getArgOperandUse(0));
7135 Ops.push_back(&
II->getArgOperandUse(1));
7137 case Intrinsic::masked_gather:
7140 Ops.push_back(&
II->getArgOperandUse(0));
7142 case Intrinsic::masked_scatter:
7145 Ops.push_back(&
II->getArgOperandUse(1));
7152 auto ShouldSinkCondition = [](
Value *
Cond,
7157 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
7161 Ops.push_back(&
II->getOperandUse(0));
7165 switch (
I->getOpcode()) {
7166 case Instruction::GetElementPtr:
7167 case Instruction::Add:
7168 case Instruction::Sub:
7170 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
7172 Ops.push_back(&
I->getOperandUse(
Op));
7177 case Instruction::Select: {
7178 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
7181 Ops.push_back(&
I->getOperandUse(0));
7184 case Instruction::UncondBr:
7186 case Instruction::CondBr: {
7190 Ops.push_back(&
I->getOperandUse(0));
7193 case Instruction::FMul:
7198 Ops.push_back(&
I->getOperandUse(0));
7200 Ops.push_back(&
I->getOperandUse(1));
7210 case Instruction::Xor:
7213 if (
I->getType()->isVectorTy() && ST->isNeonAvailable()) {
7215 ST->isSVEorStreamingSVEAvailable() && (ST->hasSVE2() || ST->hasSME());
7220 case Instruction::And:
7221 case Instruction::Or:
7224 if (
I->getOpcode() == Instruction::Or &&
7229 if (!(
I->getType()->isVectorTy() && ST->hasNEON()) &&
7232 for (
auto &
Op :
I->operands()) {
7244 Ops.push_back(&Not);
7245 Ops.push_back(&InsertElt);
7255 if (!
I->getType()->isVectorTy())
7256 return !
Ops.empty();
7258 switch (
I->getOpcode()) {
7259 case Instruction::Sub:
7260 case Instruction::Add: {
7269 Ops.push_back(&Ext1->getOperandUse(0));
7270 Ops.push_back(&Ext2->getOperandUse(0));
7273 Ops.push_back(&
I->getOperandUse(0));
7274 Ops.push_back(&
I->getOperandUse(1));
7278 case Instruction::Or: {
7281 if (ST->hasNEON()) {
7295 if (
I->getParent() != MainAnd->
getParent() ||
7300 if (
I->getParent() != IA->getParent() ||
7301 I->getParent() != IB->getParent())
7306 Ops.push_back(&
I->getOperandUse(0));
7307 Ops.push_back(&
I->getOperandUse(1));
7316 case Instruction::Mul: {
7317 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
7320 if (Ty->isScalableTy())
7324 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
7327 int NumZExts = 0, NumSExts = 0;
7328 for (
auto &
Op :
I->operands()) {
7335 auto *ExtOp = Ext->getOperand(0);
7336 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
7337 Ops.push_back(&Ext->getOperandUse(0));
7345 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
7346 I->getType()->getScalarSizeInBits())
7383 if (!ElementConstant || !ElementConstant->
isZero())
7386 unsigned Opcode = OperandInstr->
getOpcode();
7387 if (Opcode == Instruction::SExt)
7389 else if (Opcode == Instruction::ZExt)
7394 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7404 Ops.push_back(&Insert->getOperandUse(1));
7410 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7414 if (!ShouldSinkSplatForIndexedVariant(
I))
7419 Ops.push_back(&
I->getOperandUse(0));
7421 Ops.push_back(&
I->getOperandUse(1));
7423 return !
Ops.empty();
7425 case Instruction::FMul: {
7427 if (
I->getType()->isScalableTy())
7428 return !
Ops.empty();
7432 return !
Ops.empty();
7436 Ops.push_back(&
I->getOperandUse(0));
7438 Ops.push_back(&
I->getOperandUse(1));
7439 return !
Ops.empty();
static bool isAllActivePredicate(const SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool preferTailFoldingOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
InstructionCost getBranchMispredictPenalty() const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
unsigned getMaxInterleaveFactor(ElementCount VF, bool HasUnorderedReductions) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool noSignedZeros() const
bool allowContract() const
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
bool isFixedLengthVector() const
MVT getVectorElementType() const
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool ShouldCheckWrap=true, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...