23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
81class TailFoldingOption {
96 bool NeedsDefault =
true;
100 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
115 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
116 "Initial bits should only include one of "
117 "(disabled|all|simple|default)");
118 Bits = NeedsDefault ? DefaultBits : InitialBits;
120 Bits &= ~DisableBits;
126 errs() <<
"invalid argument '" << Opt
127 <<
"' to -sve-tail-folding=; the option should be of the form\n"
128 " (disabled|all|default|simple)[+(reductions|recurrences"
129 "|reverse|noreductions|norecurrences|noreverse)]\n";
135 void operator=(
const std::string &Val) {
144 setNeedsDefault(
false);
147 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
149 unsigned StartIdx = 1;
150 if (TailFoldTypes[0] ==
"disabled")
151 setInitialBits(TailFoldingOpts::Disabled);
152 else if (TailFoldTypes[0] ==
"all")
153 setInitialBits(TailFoldingOpts::All);
154 else if (TailFoldTypes[0] ==
"default")
155 setNeedsDefault(
true);
156 else if (TailFoldTypes[0] ==
"simple")
157 setInitialBits(TailFoldingOpts::Simple);
160 setInitialBits(TailFoldingOpts::Disabled);
163 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
164 if (TailFoldTypes[
I] ==
"reductions")
165 setEnableBit(TailFoldingOpts::Reductions);
166 else if (TailFoldTypes[
I] ==
"recurrences")
167 setEnableBit(TailFoldingOpts::Recurrences);
168 else if (TailFoldTypes[
I] ==
"reverse")
169 setEnableBit(TailFoldingOpts::Reverse);
170 else if (TailFoldTypes[
I] ==
"noreductions")
171 setDisableBit(TailFoldingOpts::Reductions);
172 else if (TailFoldTypes[
I] ==
"norecurrences")
173 setDisableBit(TailFoldingOpts::Recurrences);
174 else if (TailFoldTypes[
I] ==
"noreverse")
175 setDisableBit(TailFoldingOpts::Reverse);
192 "Control the use of vectorisation using tail-folding for SVE where the"
193 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
194 "\ndisabled (Initial) No loop types will vectorize using "
196 "\ndefault (Initial) Uses the default tail-folding settings for "
198 "\nall (Initial) All legal loop types will vectorize using "
200 "\nsimple (Initial) Use tail-folding for simple loops (not "
201 "reductions or recurrences)"
202 "\nreductions Use tail-folding for loops containing reductions"
203 "\nnoreductions Inverse of above"
204 "\nrecurrences Use tail-folding for loops containing fixed order "
206 "\nnorecurrences Inverse of above"
207 "\nreverse Use tail-folding for loops requiring reversed "
209 "\nnoreverse Inverse of above"),
254 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
256 FeatureStr.
split(Features,
",");
261 return F.hasFnAttribute(
"fmv-features");
265 AArch64::FeatureExecuteOnly,
298 TM.getSubtargetImpl(*Caller)->getFeatureBits();
300 TM.getSubtargetImpl(*Callee)->getFeatureBits();
305 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
306 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
308 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
326 auto FVTy = dyn_cast<FixedVectorType>(Ty);
328 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
337 unsigned DefaultCallPenalty)
const {
362 if (
F ==
Call.getCaller())
368 return DefaultCallPenalty;
375 ST->isNeonAvailable());
399 assert(Ty->isIntegerTy());
401 unsigned BitSize = Ty->getPrimitiveSizeInBits();
408 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
413 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
419 return std::max<InstructionCost>(1,
Cost);
426 assert(Ty->isIntegerTy());
428 unsigned BitSize = Ty->getPrimitiveSizeInBits();
434 unsigned ImmIdx = ~0U;
438 case Instruction::GetElementPtr:
443 case Instruction::Store:
446 case Instruction::Add:
447 case Instruction::Sub:
448 case Instruction::Mul:
449 case Instruction::UDiv:
450 case Instruction::SDiv:
451 case Instruction::URem:
452 case Instruction::SRem:
453 case Instruction::And:
454 case Instruction::Or:
455 case Instruction::Xor:
456 case Instruction::ICmp:
460 case Instruction::Shl:
461 case Instruction::LShr:
462 case Instruction::AShr:
466 case Instruction::Trunc:
467 case Instruction::ZExt:
468 case Instruction::SExt:
469 case Instruction::IntToPtr:
470 case Instruction::PtrToInt:
471 case Instruction::BitCast:
472 case Instruction::PHI:
473 case Instruction::Call:
474 case Instruction::Select:
475 case Instruction::Ret:
476 case Instruction::Load:
481 int NumConstants = (BitSize + 63) / 64;
494 assert(Ty->isIntegerTy());
496 unsigned BitSize = Ty->getPrimitiveSizeInBits();
505 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
511 case Intrinsic::sadd_with_overflow:
512 case Intrinsic::uadd_with_overflow:
513 case Intrinsic::ssub_with_overflow:
514 case Intrinsic::usub_with_overflow:
515 case Intrinsic::smul_with_overflow:
516 case Intrinsic::umul_with_overflow:
518 int NumConstants = (BitSize + 63) / 64;
525 case Intrinsic::experimental_stackmap:
526 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
529 case Intrinsic::experimental_patchpoint_void:
530 case Intrinsic::experimental_patchpoint:
531 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
534 case Intrinsic::experimental_gc_statepoint:
535 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
545 if (TyWidth == 32 || TyWidth == 64)
569 unsigned TotalHistCnts = 1;
579 unsigned EC = VTy->getElementCount().getKnownMinValue();
584 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
586 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
590 TotalHistCnts = EC / NaturalVectorWidth;
610 switch (ICA.
getID()) {
611 case Intrinsic::experimental_vector_histogram_add: {
618 case Intrinsic::umin:
619 case Intrinsic::umax:
620 case Intrinsic::smin:
621 case Intrinsic::smax: {
622 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
623 MVT::v8i16, MVT::v2i32, MVT::v4i32,
624 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
628 if (LT.second == MVT::v2i64)
630 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
634 case Intrinsic::sadd_sat:
635 case Intrinsic::ssub_sat:
636 case Intrinsic::uadd_sat:
637 case Intrinsic::usub_sat: {
638 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
639 MVT::v8i16, MVT::v2i32, MVT::v4i32,
645 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
646 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
647 return LT.first * Instrs;
652 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
653 return LT.first * Instrs;
657 case Intrinsic::abs: {
658 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
659 MVT::v8i16, MVT::v2i32, MVT::v4i32,
662 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
666 case Intrinsic::bswap: {
667 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
668 MVT::v4i32, MVT::v2i64};
670 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
671 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
676 case Intrinsic::fmuladd: {
681 (EltTy->
isHalfTy() && ST->hasFullFP16()))
685 case Intrinsic::stepvector: {
694 Cost += AddCost * (LT.first - 1);
698 case Intrinsic::vector_extract:
699 case Intrinsic::vector_insert: {
712 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
713 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
721 getTLI()->getTypeConversion(
C, SubVecVT);
723 getTLI()->getTypeConversion(
C, VecVT);
731 case Intrinsic::bitreverse: {
733 {Intrinsic::bitreverse, MVT::i32, 1},
734 {Intrinsic::bitreverse, MVT::i64, 1},
735 {Intrinsic::bitreverse, MVT::v8i8, 1},
736 {Intrinsic::bitreverse, MVT::v16i8, 1},
737 {Intrinsic::bitreverse, MVT::v4i16, 2},
738 {Intrinsic::bitreverse, MVT::v8i16, 2},
739 {Intrinsic::bitreverse, MVT::v2i32, 2},
740 {Intrinsic::bitreverse, MVT::v4i32, 2},
741 {Intrinsic::bitreverse, MVT::v1i64, 2},
742 {Intrinsic::bitreverse, MVT::v2i64, 2},
750 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
751 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
752 return LegalisationCost.first * Entry->Cost + 1;
754 return LegalisationCost.first * Entry->Cost;
758 case Intrinsic::ctpop: {
759 if (!ST->hasNEON()) {
780 RetTy->getScalarSizeInBits()
783 return LT.first * Entry->Cost + ExtraCost;
787 case Intrinsic::sadd_with_overflow:
788 case Intrinsic::uadd_with_overflow:
789 case Intrinsic::ssub_with_overflow:
790 case Intrinsic::usub_with_overflow:
791 case Intrinsic::smul_with_overflow:
792 case Intrinsic::umul_with_overflow: {
794 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
795 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
796 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
797 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
798 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
799 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
800 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
801 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
802 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
803 {Intrinsic::usub_with_overflow, MVT::i8, 3},
804 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
805 {Intrinsic::usub_with_overflow, MVT::i16, 3},
806 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
807 {Intrinsic::usub_with_overflow, MVT::i32, 1},
808 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
809 {Intrinsic::usub_with_overflow, MVT::i64, 1},
810 {Intrinsic::smul_with_overflow, MVT::i8, 5},
811 {Intrinsic::umul_with_overflow, MVT::i8, 4},
812 {Intrinsic::smul_with_overflow, MVT::i16, 5},
813 {Intrinsic::umul_with_overflow, MVT::i16, 4},
814 {Intrinsic::smul_with_overflow, MVT::i32, 2},
815 {Intrinsic::umul_with_overflow, MVT::i32, 2},
816 {Intrinsic::smul_with_overflow, MVT::i64, 3},
817 {Intrinsic::umul_with_overflow, MVT::i64, 3},
819 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
826 case Intrinsic::fptosi_sat:
827 case Intrinsic::fptoui_sat: {
830 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
832 EVT MTy = TLI->getValueType(
DL, RetTy);
835 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
836 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
837 LT.second == MVT::v2f64)) {
839 (LT.second == MVT::f64 && MTy == MVT::i32) ||
840 (LT.second == MVT::f32 && MTy == MVT::i64)))
849 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
856 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
857 (LT.second == MVT::f16 && MTy == MVT::i64) ||
858 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
872 if ((LT.second.getScalarType() == MVT::f32 ||
873 LT.second.getScalarType() == MVT::f64 ||
874 LT.second.getScalarType() == MVT::f16) &&
878 if (LT.second.isVector())
882 LegalTy, {LegalTy, LegalTy});
885 LegalTy, {LegalTy, LegalTy});
887 return LT.first *
Cost +
888 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
894 RetTy = RetTy->getScalarType();
895 if (LT.second.isVector()) {
913 return LT.first *
Cost;
915 case Intrinsic::fshl:
916 case Intrinsic::fshr: {
928 {Intrinsic::fshl, MVT::v4i32, 2},
929 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
930 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
931 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
937 return LegalisationCost.first * Entry->Cost;
941 if (!RetTy->isIntegerTy())
946 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
947 RetTy->getScalarSizeInBits() < 64) ||
948 (RetTy->getScalarSizeInBits() % 64 != 0);
949 unsigned ExtraCost = HigherCost ? 1 : 0;
950 if (RetTy->getScalarSizeInBits() == 32 ||
951 RetTy->getScalarSizeInBits() == 64)
958 return TyL.first + ExtraCost;
960 case Intrinsic::get_active_lane_mask: {
962 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
964 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
967 if (RetTy->isScalableTy()) {
968 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
978 if (ST->hasSVE2p1() || ST->hasSME2()) {
993 return Cost + (SplitCost * (
Cost - 1));
1008 case Intrinsic::experimental_vector_match: {
1011 unsigned SearchSize = NeedleTy->getNumElements();
1012 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1025 case Intrinsic::experimental_cttz_elts: {
1027 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1035 case Intrinsic::experimental_vector_extract_last_active:
1036 if (ST->isSVEorStreamingSVEAvailable()) {
1053 auto RequiredType =
II.getType();
1056 assert(PN &&
"Expected Phi Node!");
1059 if (!PN->hasOneUse())
1060 return std::nullopt;
1062 for (
Value *IncValPhi : PN->incoming_values()) {
1065 Reinterpret->getIntrinsicID() !=
1066 Intrinsic::aarch64_sve_convert_to_svbool ||
1067 RequiredType != Reinterpret->getArgOperand(0)->getType())
1068 return std::nullopt;
1076 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1078 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1151 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1156 return GoverningPredicateIdx;
1161 GoverningPredicateIdx = Index;
1179 return UndefIntrinsic;
1184 UndefIntrinsic = IID;
1206 return ResultLanes == InactiveLanesTakenFromOperand;
1211 return OperandIdxForInactiveLanes;
1215 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1216 ResultLanes = InactiveLanesTakenFromOperand;
1217 OperandIdxForInactiveLanes = Index;
1222 return ResultLanes == InactiveLanesAreNotDefined;
1226 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1227 ResultLanes = InactiveLanesAreNotDefined;
1232 return ResultLanes == InactiveLanesAreUnused;
1236 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1237 ResultLanes = InactiveLanesAreUnused;
1247 ResultIsZeroInitialized =
true;
1258 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1263 return OperandIdxWithNoActiveLanes;
1268 OperandIdxWithNoActiveLanes = Index;
1273 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1276 unsigned IROpcode = 0;
1278 enum PredicationStyle {
1280 InactiveLanesTakenFromOperand,
1281 InactiveLanesAreNotDefined,
1282 InactiveLanesAreUnused
1285 bool ResultIsZeroInitialized =
false;
1286 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1287 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1295 return !isa<ScalableVectorType>(V->getType());
1303 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1304 case Intrinsic::aarch64_sve_fcvt_f16f32:
1305 case Intrinsic::aarch64_sve_fcvt_f16f64:
1306 case Intrinsic::aarch64_sve_fcvt_f32f16:
1307 case Intrinsic::aarch64_sve_fcvt_f32f64:
1308 case Intrinsic::aarch64_sve_fcvt_f64f16:
1309 case Intrinsic::aarch64_sve_fcvt_f64f32:
1310 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1311 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1312 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1313 case Intrinsic::aarch64_sve_fcvtzs:
1314 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1315 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1316 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1317 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1318 case Intrinsic::aarch64_sve_fcvtzu:
1319 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1320 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1321 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1322 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1323 case Intrinsic::aarch64_sve_scvtf:
1324 case Intrinsic::aarch64_sve_scvtf_f16i32:
1325 case Intrinsic::aarch64_sve_scvtf_f16i64:
1326 case Intrinsic::aarch64_sve_scvtf_f32i64:
1327 case Intrinsic::aarch64_sve_scvtf_f64i32:
1328 case Intrinsic::aarch64_sve_ucvtf:
1329 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1330 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1331 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1332 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1335 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1336 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1337 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1338 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1341 case Intrinsic::aarch64_sve_fabd:
1343 case Intrinsic::aarch64_sve_fadd:
1346 case Intrinsic::aarch64_sve_fdiv:
1349 case Intrinsic::aarch64_sve_fmax:
1351 case Intrinsic::aarch64_sve_fmaxnm:
1353 case Intrinsic::aarch64_sve_fmin:
1355 case Intrinsic::aarch64_sve_fminnm:
1357 case Intrinsic::aarch64_sve_fmla:
1359 case Intrinsic::aarch64_sve_fmls:
1361 case Intrinsic::aarch64_sve_fmul:
1364 case Intrinsic::aarch64_sve_fmulx:
1366 case Intrinsic::aarch64_sve_fnmla:
1368 case Intrinsic::aarch64_sve_fnmls:
1370 case Intrinsic::aarch64_sve_fsub:
1373 case Intrinsic::aarch64_sve_add:
1376 case Intrinsic::aarch64_sve_mla:
1378 case Intrinsic::aarch64_sve_mls:
1380 case Intrinsic::aarch64_sve_mul:
1383 case Intrinsic::aarch64_sve_sabd:
1385 case Intrinsic::aarch64_sve_sdiv:
1388 case Intrinsic::aarch64_sve_smax:
1390 case Intrinsic::aarch64_sve_smin:
1392 case Intrinsic::aarch64_sve_smulh:
1394 case Intrinsic::aarch64_sve_sub:
1397 case Intrinsic::aarch64_sve_uabd:
1399 case Intrinsic::aarch64_sve_udiv:
1402 case Intrinsic::aarch64_sve_umax:
1404 case Intrinsic::aarch64_sve_umin:
1406 case Intrinsic::aarch64_sve_umulh:
1408 case Intrinsic::aarch64_sve_asr:
1411 case Intrinsic::aarch64_sve_lsl:
1414 case Intrinsic::aarch64_sve_lsr:
1417 case Intrinsic::aarch64_sve_and:
1420 case Intrinsic::aarch64_sve_bic:
1422 case Intrinsic::aarch64_sve_eor:
1425 case Intrinsic::aarch64_sve_orr:
1428 case Intrinsic::aarch64_sve_sqsub:
1430 case Intrinsic::aarch64_sve_uqsub:
1433 case Intrinsic::aarch64_sve_add_u:
1436 case Intrinsic::aarch64_sve_and_u:
1439 case Intrinsic::aarch64_sve_asr_u:
1442 case Intrinsic::aarch64_sve_eor_u:
1445 case Intrinsic::aarch64_sve_fadd_u:
1448 case Intrinsic::aarch64_sve_fdiv_u:
1451 case Intrinsic::aarch64_sve_fmul_u:
1454 case Intrinsic::aarch64_sve_fsub_u:
1457 case Intrinsic::aarch64_sve_lsl_u:
1460 case Intrinsic::aarch64_sve_lsr_u:
1463 case Intrinsic::aarch64_sve_mul_u:
1466 case Intrinsic::aarch64_sve_orr_u:
1469 case Intrinsic::aarch64_sve_sdiv_u:
1472 case Intrinsic::aarch64_sve_sub_u:
1475 case Intrinsic::aarch64_sve_udiv_u:
1479 case Intrinsic::aarch64_sve_addqv:
1480 case Intrinsic::aarch64_sve_and_z:
1481 case Intrinsic::aarch64_sve_bic_z:
1482 case Intrinsic::aarch64_sve_brka_z:
1483 case Intrinsic::aarch64_sve_brkb_z:
1484 case Intrinsic::aarch64_sve_brkn_z:
1485 case Intrinsic::aarch64_sve_brkpa_z:
1486 case Intrinsic::aarch64_sve_brkpb_z:
1487 case Intrinsic::aarch64_sve_cntp:
1488 case Intrinsic::aarch64_sve_compact:
1489 case Intrinsic::aarch64_sve_eor_z:
1490 case Intrinsic::aarch64_sve_eorv:
1491 case Intrinsic::aarch64_sve_eorqv:
1492 case Intrinsic::aarch64_sve_nand_z:
1493 case Intrinsic::aarch64_sve_nor_z:
1494 case Intrinsic::aarch64_sve_orn_z:
1495 case Intrinsic::aarch64_sve_orr_z:
1496 case Intrinsic::aarch64_sve_orv:
1497 case Intrinsic::aarch64_sve_orqv:
1498 case Intrinsic::aarch64_sve_pnext:
1499 case Intrinsic::aarch64_sve_rdffr_z:
1500 case Intrinsic::aarch64_sve_saddv:
1501 case Intrinsic::aarch64_sve_uaddv:
1502 case Intrinsic::aarch64_sve_umaxv:
1503 case Intrinsic::aarch64_sve_umaxqv:
1504 case Intrinsic::aarch64_sve_cmpeq:
1505 case Intrinsic::aarch64_sve_cmpeq_wide:
1506 case Intrinsic::aarch64_sve_cmpge:
1507 case Intrinsic::aarch64_sve_cmpge_wide:
1508 case Intrinsic::aarch64_sve_cmpgt:
1509 case Intrinsic::aarch64_sve_cmpgt_wide:
1510 case Intrinsic::aarch64_sve_cmphi:
1511 case Intrinsic::aarch64_sve_cmphi_wide:
1512 case Intrinsic::aarch64_sve_cmphs:
1513 case Intrinsic::aarch64_sve_cmphs_wide:
1514 case Intrinsic::aarch64_sve_cmple_wide:
1515 case Intrinsic::aarch64_sve_cmplo_wide:
1516 case Intrinsic::aarch64_sve_cmpls_wide:
1517 case Intrinsic::aarch64_sve_cmplt_wide:
1518 case Intrinsic::aarch64_sve_cmpne:
1519 case Intrinsic::aarch64_sve_cmpne_wide:
1520 case Intrinsic::aarch64_sve_facge:
1521 case Intrinsic::aarch64_sve_facgt:
1522 case Intrinsic::aarch64_sve_fcmpeq:
1523 case Intrinsic::aarch64_sve_fcmpge:
1524 case Intrinsic::aarch64_sve_fcmpgt:
1525 case Intrinsic::aarch64_sve_fcmpne:
1526 case Intrinsic::aarch64_sve_fcmpuo:
1527 case Intrinsic::aarch64_sve_ld1:
1528 case Intrinsic::aarch64_sve_ld1_gather:
1529 case Intrinsic::aarch64_sve_ld1_gather_index:
1530 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1531 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1532 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1533 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1534 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1535 case Intrinsic::aarch64_sve_ld1q_gather_index:
1536 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1537 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1538 case Intrinsic::aarch64_sve_ld1ro:
1539 case Intrinsic::aarch64_sve_ld1rq:
1540 case Intrinsic::aarch64_sve_ld1udq:
1541 case Intrinsic::aarch64_sve_ld1uwq:
1542 case Intrinsic::aarch64_sve_ld2_sret:
1543 case Intrinsic::aarch64_sve_ld2q_sret:
1544 case Intrinsic::aarch64_sve_ld3_sret:
1545 case Intrinsic::aarch64_sve_ld3q_sret:
1546 case Intrinsic::aarch64_sve_ld4_sret:
1547 case Intrinsic::aarch64_sve_ld4q_sret:
1548 case Intrinsic::aarch64_sve_ldff1:
1549 case Intrinsic::aarch64_sve_ldff1_gather:
1550 case Intrinsic::aarch64_sve_ldff1_gather_index:
1551 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1552 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1553 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1554 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1555 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1556 case Intrinsic::aarch64_sve_ldnf1:
1557 case Intrinsic::aarch64_sve_ldnt1:
1558 case Intrinsic::aarch64_sve_ldnt1_gather:
1559 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1560 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1561 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1564 case Intrinsic::aarch64_sve_prf:
1565 case Intrinsic::aarch64_sve_prfb_gather_index:
1566 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1567 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1568 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1569 case Intrinsic::aarch64_sve_prfd_gather_index:
1570 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1571 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1572 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1573 case Intrinsic::aarch64_sve_prfh_gather_index:
1574 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1575 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1576 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1577 case Intrinsic::aarch64_sve_prfw_gather_index:
1578 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1579 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1580 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1583 case Intrinsic::aarch64_sve_st1_scatter:
1584 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1585 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1586 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1587 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1588 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1589 case Intrinsic::aarch64_sve_st1dq:
1590 case Intrinsic::aarch64_sve_st1q_scatter_index:
1591 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1592 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1593 case Intrinsic::aarch64_sve_st1wq:
1594 case Intrinsic::aarch64_sve_stnt1:
1595 case Intrinsic::aarch64_sve_stnt1_scatter:
1596 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1597 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1598 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1600 case Intrinsic::aarch64_sve_st2:
1601 case Intrinsic::aarch64_sve_st2q:
1603 case Intrinsic::aarch64_sve_st3:
1604 case Intrinsic::aarch64_sve_st3q:
1606 case Intrinsic::aarch64_sve_st4:
1607 case Intrinsic::aarch64_sve_st4q:
1615 Value *UncastedPred;
1621 Pred = UncastedPred;
1627 if (OrigPredTy->getMinNumElements() <=
1629 ->getMinNumElements())
1630 Pred = UncastedPred;
1634 return C &&
C->isAllOnesValue();
1641 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1642 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1650static std::optional<Instruction *>
1657 Value *Op1 =
II.getOperand(1);
1658 Value *Op2 =
II.getOperand(2);
1684 return std::nullopt;
1692 if (SimpleII == Inactive)
1702static std::optional<Instruction *>
1706 return std::nullopt;
1735 II.setCalledFunction(NewDecl);
1745 return std::nullopt;
1757static std::optional<Instruction *>
1761 return std::nullopt;
1763 auto IntrinsicID = BinOp->getIntrinsicID();
1764 switch (IntrinsicID) {
1765 case Intrinsic::aarch64_sve_and_z:
1766 case Intrinsic::aarch64_sve_bic_z:
1767 case Intrinsic::aarch64_sve_eor_z:
1768 case Intrinsic::aarch64_sve_nand_z:
1769 case Intrinsic::aarch64_sve_nor_z:
1770 case Intrinsic::aarch64_sve_orn_z:
1771 case Intrinsic::aarch64_sve_orr_z:
1774 return std::nullopt;
1777 auto BinOpPred = BinOp->getOperand(0);
1778 auto BinOpOp1 = BinOp->getOperand(1);
1779 auto BinOpOp2 = BinOp->getOperand(2);
1783 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1784 return std::nullopt;
1786 auto PredOp = PredIntr->getOperand(0);
1788 if (PredOpTy !=
II.getType())
1789 return std::nullopt;
1793 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1794 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1795 if (BinOpOp1 == BinOpOp2)
1796 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1799 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1801 auto NarrowedBinOp =
1806static std::optional<Instruction *>
1813 return BinOpCombine;
1818 return std::nullopt;
1821 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1830 if (CursorVTy->getElementCount().getKnownMinValue() <
1831 IVTy->getElementCount().getKnownMinValue())
1835 if (Cursor->getType() == IVTy)
1836 EarliestReplacement = Cursor;
1841 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1842 Intrinsic::aarch64_sve_convert_to_svbool ||
1843 IntrinsicCursor->getIntrinsicID() ==
1844 Intrinsic::aarch64_sve_convert_from_svbool))
1847 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1848 Cursor = IntrinsicCursor->getOperand(0);
1853 if (!EarliestReplacement)
1854 return std::nullopt;
1862 auto *OpPredicate =
II.getOperand(0);
1875 return std::nullopt;
1878 return std::nullopt;
1880 const auto PTruePattern =
1882 if (PTruePattern != AArch64SVEPredPattern::vl1)
1883 return std::nullopt;
1888 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1889 Insert->insertBefore(
II.getIterator());
1890 Insert->takeName(&
II);
1900 II.getArgOperand(0));
1910 return std::nullopt;
1915 if (!SplatValue || !SplatValue->isZero())
1916 return std::nullopt;
1921 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1922 return std::nullopt;
1926 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1927 return std::nullopt;
1930 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1931 return std::nullopt;
1936 return std::nullopt;
1939 return std::nullopt;
1943 return std::nullopt;
1947 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1948 return std::nullopt;
1950 unsigned NumElts = VecTy->getNumElements();
1951 unsigned PredicateBits = 0;
1954 for (
unsigned I = 0;
I < NumElts; ++
I) {
1957 return std::nullopt;
1959 PredicateBits |= 1 << (
I * (16 / NumElts));
1963 if (PredicateBits == 0) {
1965 PFalse->takeName(&
II);
1971 for (
unsigned I = 0;
I < 16; ++
I)
1972 if ((PredicateBits & (1 <<
I)) != 0)
1975 unsigned PredSize = Mask & -Mask;
1980 for (
unsigned I = 0;
I < 16;
I += PredSize)
1981 if ((PredicateBits & (1 <<
I)) == 0)
1982 return std::nullopt;
1987 {PredType}, {PTruePat});
1989 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1990 auto *ConvertFromSVBool =
1992 {
II.getType()}, {ConvertToSVBool});
2000 Value *Pg =
II.getArgOperand(0);
2001 Value *Vec =
II.getArgOperand(1);
2002 auto IntrinsicID =
II.getIntrinsicID();
2003 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2015 auto OpC = OldBinOp->getOpcode();
2021 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2027 if (IsAfter &&
C &&
C->isNullValue()) {
2031 Extract->insertBefore(
II.getIterator());
2032 Extract->takeName(&
II);
2038 return std::nullopt;
2040 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2041 return std::nullopt;
2043 const auto PTruePattern =
2049 return std::nullopt;
2051 unsigned Idx = MinNumElts - 1;
2061 if (Idx >= PgVTy->getMinNumElements())
2062 return std::nullopt;
2067 Extract->insertBefore(
II.getIterator());
2068 Extract->takeName(&
II);
2081 Value *Pg =
II.getArgOperand(0);
2083 Value *Vec =
II.getArgOperand(2);
2086 if (!Ty->isIntegerTy())
2087 return std::nullopt;
2092 return std::nullopt;
2109 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2122 {
II.getType()}, {AllPat});
2129static std::optional<Instruction *>
2133 if (
Pattern == AArch64SVEPredPattern::all) {
2142 return MinNumElts && NumElts >= MinNumElts
2144 II, ConstantInt::get(
II.getType(), MinNumElts)))
2148static std::optional<Instruction *>
2151 if (!ST->isStreaming())
2152 return std::nullopt;
2164 Value *PgVal =
II.getArgOperand(0);
2165 Value *OpVal =
II.getArgOperand(1);
2169 if (PgVal == OpVal &&
2170 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2171 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2186 return std::nullopt;
2190 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2191 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2205 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2206 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2207 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2208 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2209 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2210 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2211 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2212 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2213 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2214 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2215 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2216 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2217 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2227 return std::nullopt;
2230template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2231static std::optional<Instruction *>
2233 bool MergeIntoAddendOp) {
2235 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2236 if (MergeIntoAddendOp) {
2237 AddendOp =
II.getOperand(1);
2238 Mul =
II.getOperand(2);
2240 AddendOp =
II.getOperand(2);
2241 Mul =
II.getOperand(1);
2246 return std::nullopt;
2248 if (!
Mul->hasOneUse())
2249 return std::nullopt;
2252 if (
II.getType()->isFPOrFPVectorTy()) {
2257 return std::nullopt;
2259 return std::nullopt;
2264 if (MergeIntoAddendOp)
2274static std::optional<Instruction *>
2276 Value *Pred =
II.getOperand(0);
2277 Value *PtrOp =
II.getOperand(1);
2278 Type *VecTy =
II.getType();
2282 Load->copyMetadata(
II);
2293static std::optional<Instruction *>
2295 Value *VecOp =
II.getOperand(0);
2296 Value *Pred =
II.getOperand(1);
2297 Value *PtrOp =
II.getOperand(2);
2301 Store->copyMetadata(
II);
2313 case Intrinsic::aarch64_sve_fmul_u:
2314 return Instruction::BinaryOps::FMul;
2315 case Intrinsic::aarch64_sve_fadd_u:
2316 return Instruction::BinaryOps::FAdd;
2317 case Intrinsic::aarch64_sve_fsub_u:
2318 return Instruction::BinaryOps::FSub;
2320 return Instruction::BinaryOpsEnd;
2324static std::optional<Instruction *>
2327 if (
II.isStrictFP())
2328 return std::nullopt;
2330 auto *OpPredicate =
II.getOperand(0);
2332 if (BinOpCode == Instruction::BinaryOpsEnd ||
2334 return std::nullopt;
2336 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2343 Intrinsic::aarch64_sve_mla>(
2347 Intrinsic::aarch64_sve_mad>(
2350 return std::nullopt;
2353static std::optional<Instruction *>
2357 Intrinsic::aarch64_sve_fmla>(IC,
II,
2362 Intrinsic::aarch64_sve_fmad>(IC,
II,
2367 Intrinsic::aarch64_sve_fmla>(IC,
II,
2370 return std::nullopt;
2373static std::optional<Instruction *>
2377 Intrinsic::aarch64_sve_fmla>(IC,
II,
2382 Intrinsic::aarch64_sve_fmad>(IC,
II,
2387 Intrinsic::aarch64_sve_fmla_u>(
2393static std::optional<Instruction *>
2397 Intrinsic::aarch64_sve_fmls>(IC,
II,
2402 Intrinsic::aarch64_sve_fnmsb>(
2407 Intrinsic::aarch64_sve_fmls>(IC,
II,
2410 return std::nullopt;
2413static std::optional<Instruction *>
2417 Intrinsic::aarch64_sve_fmls>(IC,
II,
2422 Intrinsic::aarch64_sve_fnmsb>(
2427 Intrinsic::aarch64_sve_fmls_u>(
2436 Intrinsic::aarch64_sve_mls>(
2439 return std::nullopt;
2444 Value *UnpackArg =
II.getArgOperand(0);
2446 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2447 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2460 return std::nullopt;
2464 auto *OpVal =
II.getOperand(0);
2465 auto *OpIndices =
II.getOperand(1);
2472 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2473 return std::nullopt;
2488 Type *RetTy =
II.getType();
2489 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2490 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2494 if ((
match(
II.getArgOperand(0),
2501 if (TyA ==
B->getType() &&
2506 TyA->getMinNumElements());
2512 return std::nullopt;
2520 if (
match(
II.getArgOperand(0),
2525 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2527 return std::nullopt;
2530static std::optional<Instruction *>
2532 Value *Mask =
II.getOperand(0);
2533 Value *BasePtr =
II.getOperand(1);
2534 Value *Index =
II.getOperand(2);
2545 BasePtr->getPointerAlignment(
II.getDataLayout());
2548 BasePtr, IndexBase);
2555 return std::nullopt;
2558static std::optional<Instruction *>
2560 Value *Val =
II.getOperand(0);
2561 Value *Mask =
II.getOperand(1);
2562 Value *BasePtr =
II.getOperand(2);
2563 Value *Index =
II.getOperand(3);
2573 BasePtr->getPointerAlignment(
II.getDataLayout());
2576 BasePtr, IndexBase);
2582 return std::nullopt;
2588 Value *Pred =
II.getOperand(0);
2589 Value *Vec =
II.getOperand(1);
2590 Value *DivVec =
II.getOperand(2);
2594 if (!SplatConstantInt)
2595 return std::nullopt;
2599 if (DivisorValue == -1)
2600 return std::nullopt;
2601 if (DivisorValue == 1)
2607 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2614 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2616 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2620 return std::nullopt;
2624 size_t VecSize = Vec.
size();
2629 size_t HalfVecSize = VecSize / 2;
2633 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2641 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2659 return std::nullopt;
2666 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2667 CurrentInsertElt = InsertElt->getOperand(0);
2673 return std::nullopt;
2677 for (
size_t I = 0;
I < Elts.
size();
I++) {
2678 if (Elts[
I] ==
nullptr)
2683 if (InsertEltChain ==
nullptr)
2684 return std::nullopt;
2690 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2691 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2692 IIScalableTy->getMinNumElements() /
2697 auto *WideShuffleMaskTy =
2708 auto NarrowBitcast =
2721 return std::nullopt;
2726 Value *Pred =
II.getOperand(0);
2727 Value *Vec =
II.getOperand(1);
2728 Value *Shift =
II.getOperand(2);
2731 Value *AbsPred, *MergedValue;
2737 return std::nullopt;
2745 return std::nullopt;
2750 return std::nullopt;
2753 {
II.getType()}, {Pred, Vec, Shift});
2760 Value *Vec =
II.getOperand(0);
2765 return std::nullopt;
2771 auto *NI =
II.getNextNode();
2774 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2776 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2777 auto *NIBB = NI->getParent();
2778 NI = NI->getNextNode();
2780 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2781 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2787 if (NextII &&
II.isIdenticalTo(NextII))
2790 return std::nullopt;
2798 {II.getType(), II.getOperand(0)->getType()},
2799 {II.getOperand(0), II.getOperand(1)}));
2806 return std::nullopt;
2812 Value *Passthru =
II.getOperand(0);
2820 auto *Mask = ConstantInt::get(Ty, MaskValue);
2826 return std::nullopt;
2829static std::optional<Instruction *>
2836 return std::nullopt;
2839std::optional<Instruction *>
2850 case Intrinsic::aarch64_dmb:
2852 case Intrinsic::aarch64_neon_fmaxnm:
2853 case Intrinsic::aarch64_neon_fminnm:
2855 case Intrinsic::aarch64_sve_convert_from_svbool:
2857 case Intrinsic::aarch64_sve_dup:
2859 case Intrinsic::aarch64_sve_dup_x:
2861 case Intrinsic::aarch64_sve_cmpne:
2862 case Intrinsic::aarch64_sve_cmpne_wide:
2864 case Intrinsic::aarch64_sve_rdffr:
2866 case Intrinsic::aarch64_sve_lasta:
2867 case Intrinsic::aarch64_sve_lastb:
2869 case Intrinsic::aarch64_sve_clasta_n:
2870 case Intrinsic::aarch64_sve_clastb_n:
2872 case Intrinsic::aarch64_sve_cntd:
2874 case Intrinsic::aarch64_sve_cntw:
2876 case Intrinsic::aarch64_sve_cnth:
2878 case Intrinsic::aarch64_sve_cntb:
2880 case Intrinsic::aarch64_sme_cntsd:
2882 case Intrinsic::aarch64_sve_ptest_any:
2883 case Intrinsic::aarch64_sve_ptest_first:
2884 case Intrinsic::aarch64_sve_ptest_last:
2886 case Intrinsic::aarch64_sve_fadd:
2888 case Intrinsic::aarch64_sve_fadd_u:
2890 case Intrinsic::aarch64_sve_fmul_u:
2892 case Intrinsic::aarch64_sve_fsub:
2894 case Intrinsic::aarch64_sve_fsub_u:
2896 case Intrinsic::aarch64_sve_add:
2898 case Intrinsic::aarch64_sve_add_u:
2900 Intrinsic::aarch64_sve_mla_u>(
2902 case Intrinsic::aarch64_sve_sub:
2904 case Intrinsic::aarch64_sve_sub_u:
2906 Intrinsic::aarch64_sve_mls_u>(
2908 case Intrinsic::aarch64_sve_tbl:
2910 case Intrinsic::aarch64_sve_uunpkhi:
2911 case Intrinsic::aarch64_sve_uunpklo:
2912 case Intrinsic::aarch64_sve_sunpkhi:
2913 case Intrinsic::aarch64_sve_sunpklo:
2915 case Intrinsic::aarch64_sve_uzp1:
2917 case Intrinsic::aarch64_sve_zip1:
2918 case Intrinsic::aarch64_sve_zip2:
2920 case Intrinsic::aarch64_sve_ld1_gather_index:
2922 case Intrinsic::aarch64_sve_st1_scatter_index:
2924 case Intrinsic::aarch64_sve_ld1:
2926 case Intrinsic::aarch64_sve_st1:
2928 case Intrinsic::aarch64_sve_sdiv:
2930 case Intrinsic::aarch64_sve_sel:
2932 case Intrinsic::aarch64_sve_srshl:
2934 case Intrinsic::aarch64_sve_dupq_lane:
2936 case Intrinsic::aarch64_sve_insr:
2938 case Intrinsic::aarch64_sve_whilelo:
2940 case Intrinsic::aarch64_sve_ptrue:
2942 case Intrinsic::aarch64_sve_uxtb:
2944 case Intrinsic::aarch64_sve_uxth:
2946 case Intrinsic::aarch64_sve_uxtw:
2948 case Intrinsic::aarch64_sme_in_streaming_mode:
2952 return std::nullopt;
2959 SimplifyAndSetOp)
const {
2960 switch (
II.getIntrinsicID()) {
2963 case Intrinsic::aarch64_neon_fcvtxn:
2964 case Intrinsic::aarch64_neon_rshrn:
2965 case Intrinsic::aarch64_neon_sqrshrn:
2966 case Intrinsic::aarch64_neon_sqrshrun:
2967 case Intrinsic::aarch64_neon_sqshrn:
2968 case Intrinsic::aarch64_neon_sqshrun:
2969 case Intrinsic::aarch64_neon_sqxtn:
2970 case Intrinsic::aarch64_neon_sqxtun:
2971 case Intrinsic::aarch64_neon_uqrshrn:
2972 case Intrinsic::aarch64_neon_uqshrn:
2973 case Intrinsic::aarch64_neon_uqxtn:
2974 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2978 return std::nullopt;
2982 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2992 if (ST->useSVEForFixedLengthVectors() &&
2995 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
2996 else if (ST->isNeonAvailable())
3001 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3010bool AArch64TTIImpl::isSingleExtWideningInstruction(
3012 Type *SrcOverrideTy)
const {
3027 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3030 Type *SrcTy = SrcOverrideTy;
3032 case Instruction::Add:
3033 case Instruction::Sub: {
3042 if (Opcode == Instruction::Sub)
3066 assert(SrcTy &&
"Expected some SrcTy");
3068 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3074 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3076 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3080 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3083Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3085 Type *SrcOverrideTy)
const {
3086 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3087 Opcode != Instruction::Mul)
3097 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3100 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3106 ->getScalarSizeInBits();
3109 unsigned MaxEltSize = 0;
3112 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3113 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3114 MaxEltSize = std::max(EltSize0, EltSize1);
3117 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3118 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3121 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3123 MaxEltSize = DstEltSize / 2;
3124 }
else if (Opcode == Instruction::Mul &&
3137 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3141 if (MaxEltSize * 2 > DstEltSize)
3159 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3160 (Src->isScalableTy() && !ST->hasSVE2()))
3170 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3174 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3178 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3179 Src->getScalarSizeInBits() !=
3203 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3207 if (
I &&
I->hasOneUser()) {
3210 if (
Type *ExtTy = isBinExtWideningInstruction(
3211 SingleUser->getOpcode(), Dst, Operands,
3212 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3225 if (isSingleExtWideningInstruction(
3226 SingleUser->getOpcode(), Dst, Operands,
3227 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3231 if (SingleUser->getOpcode() == Instruction::Add) {
3232 if (
I == SingleUser->getOperand(1) ||
3234 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3252 return Cost == 0 ? 0 : 1;
3256 EVT SrcTy = TLI->getValueType(
DL, Src);
3257 EVT DstTy = TLI->getValueType(
DL, Dst);
3259 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3265 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3289 return AdjustCost(Entry->Cost);
3297 const unsigned int SVE_EXT_COST = 1;
3298 const unsigned int SVE_FCVT_COST = 1;
3299 const unsigned int SVE_UNPACK_ONCE = 4;
3300 const unsigned int SVE_UNPACK_TWICE = 16;
3378 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3379 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3380 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3382 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3383 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3384 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3385 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3386 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3387 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3388 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3390 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3391 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3392 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3393 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3394 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3395 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3396 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3429 SVE_EXT_COST + SVE_FCVT_COST},
3434 SVE_EXT_COST + SVE_FCVT_COST},
3441 SVE_EXT_COST + SVE_FCVT_COST},
3445 SVE_EXT_COST + SVE_FCVT_COST},
3451 SVE_EXT_COST + SVE_FCVT_COST},
3454 SVE_EXT_COST + SVE_FCVT_COST},
3459 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3461 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3471 SVE_EXT_COST + SVE_FCVT_COST},
3476 SVE_EXT_COST + SVE_FCVT_COST},
3489 SVE_EXT_COST + SVE_FCVT_COST},
3493 SVE_EXT_COST + SVE_FCVT_COST},
3505 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3507 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3509 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3511 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3515 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3517 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3533 SVE_EXT_COST + SVE_FCVT_COST},
3538 SVE_EXT_COST + SVE_FCVT_COST},
3549 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3551 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3553 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3555 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3557 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3559 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3563 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3565 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3567 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3569 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3713 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3714 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3715 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3718 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3719 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3720 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3723 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3724 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3725 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3728 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3729 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3730 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3733 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3734 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3735 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3738 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3739 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3740 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3743 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3744 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3745 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3768 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3771 ST->useSVEForFixedLengthVectors(WiderTy)) {
3772 std::pair<InstructionCost, MVT> LT =
3774 unsigned NumElements =
3786 return AdjustCost(Entry->Cost);
3813 if (ST->hasFullFP16())
3816 return AdjustCost(Entry->Cost);
3834 ST->isSVEorStreamingSVEAvailable() &&
3835 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3837 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3846 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3849 return Part1 + Part2;
3856 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3869 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3882 CostKind, Index,
nullptr,
nullptr);
3886 auto DstVT = TLI->getValueType(
DL, Dst);
3887 auto SrcVT = TLI->getValueType(
DL, Src);
3892 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3898 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3908 case Instruction::SExt:
3913 case Instruction::ZExt:
3914 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3927 return Opcode == Instruction::PHI ? 0 : 1;
3936 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3944 if (!LT.second.isVector())
3949 if (LT.second.isFixedLengthVector()) {
3950 unsigned Width = LT.second.getVectorNumElements();
3951 Index = Index % Width;
3999 auto ExtractCanFuseWithFmul = [&]() {
4006 auto IsAllowedScalarTy = [&](
const Type *
T) {
4007 return T->isFloatTy() ||
T->isDoubleTy() ||
4008 (
T->isHalfTy() && ST->hasFullFP16());
4012 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4015 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4016 !BO->getType()->isVectorTy();
4021 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4025 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4034 DenseMap<User *, unsigned> UserToExtractIdx;
4035 for (
auto *U :
Scalar->users()) {
4036 if (!IsUserFMulScalarTy(U))
4040 UserToExtractIdx[
U];
4042 if (UserToExtractIdx.
empty())
4044 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4045 for (
auto *U : S->users()) {
4046 if (UserToExtractIdx.
contains(U)) {
4048 auto *Op0 =
FMul->getOperand(0);
4049 auto *Op1 =
FMul->getOperand(1);
4050 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4051 UserToExtractIdx[
U] =
L;
4057 for (
auto &[U, L] : UserToExtractIdx) {
4069 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4070 if (!IsUserFMulScalarTy(U))
4075 const auto *BO = cast<BinaryOperator>(U);
4076 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4077 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4079 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4082 return IsExtractLaneEquivalentToZero(
4083 cast<ConstantInt>(OtherEE->getIndexOperand())
4086 OtherEE->getType()->getScalarSizeInBits());
4094 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4095 ExtractCanFuseWithFmul())
4100 :
ST->getVectorInsertExtractBaseCost();
4107 const Value *Op1)
const {
4111 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4114 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4120 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4121 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4128 unsigned Index)
const {
4129 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4135 unsigned Index)
const {
4147 : ST->getVectorInsertExtractBaseCost() + 1;
4156 if (Ty->getElementType()->isFloatingPointTy())
4159 unsigned VecInstCost =
4161 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4168 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4169 return std::nullopt;
4170 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4171 return std::nullopt;
4178 Cost += InstCost(PromotedTy);
4201 Op2Info, Args, CxtI);
4205 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4212 Ty,
CostKind, Op1Info, Op2Info,
true,
4213 [&](
Type *PromotedTy) {
4217 return *PromotedCost;
4223 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4290 auto VT = TLI->getValueType(
DL, Ty);
4291 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4295 : (3 * AsrCost + AddCost);
4297 return MulCost + AsrCost + 2 * AddCost;
4299 }
else if (VT.isVector()) {
4309 if (Ty->isScalableTy() && ST->hasSVE())
4310 Cost += 2 * AsrCost;
4315 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4319 }
else if (LT.second == MVT::v2i64) {
4320 return VT.getVectorNumElements() *
4327 if (Ty->isScalableTy() && ST->hasSVE())
4328 return MulCost + 2 * AddCost + 2 * AsrCost;
4329 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4334 LT.second.isFixedLengthVector()) {
4344 return ExtractCost + InsertCost +
4352 auto VT = TLI->getValueType(
DL, Ty);
4368 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4369 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4370 LT.second == MVT::nxv16i8;
4371 bool Is128bit = LT.second.is128BitVector();
4383 (HasMULH ? 0 : ShrCost) +
4384 AddCost * 2 + ShrCost;
4385 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4392 if (!VT.isVector() && VT.getSizeInBits() > 64)
4396 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4398 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4402 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4412 if (
nullptr != Entry)
4417 if (LT.second.getScalarType() == MVT::i8)
4419 else if (LT.second.getScalarType() == MVT::i16)
4431 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4432 return (4 + DivCost) * VTy->getNumElements();
4438 -1,
nullptr,
nullptr);
4452 if (LT.second == MVT::v2i64 && ST->hasSVE())
4465 if (LT.second != MVT::v2i64)
4487 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4488 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4497 if (!Ty->getScalarType()->isFP128Ty())
4504 if (!Ty->getScalarType()->isFP128Ty())
4505 return 2 * LT.first;
4512 if (!Ty->isVectorTy())
4528 int MaxMergeDistance = 64;
4532 return NumVectorInstToHideOverhead;
4542 unsigned Opcode1,
unsigned Opcode2)
const {
4545 if (!
Sched.hasInstrSchedModel())
4549 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4551 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4557 "Cannot handle variant scheduling classes without an MI");
4573 const int AmortizationCost = 20;
4581 VecPred = CurrentPred;
4589 static const auto ValidMinMaxTys = {
4590 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4591 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4592 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4595 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4596 (ST->hasFullFP16() &&
4597 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4602 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4603 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4604 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4605 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4606 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4607 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4608 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4609 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4610 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4611 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4612 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4614 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4615 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4624 if (Opcode == Instruction::FCmp) {
4626 ValTy,
CostKind, Op1Info, Op2Info,
false,
4627 [&](
Type *PromotedTy) {
4639 return *PromotedCost;
4643 if (LT.second.getScalarType() != MVT::f64 &&
4644 LT.second.getScalarType() != MVT::f32 &&
4645 LT.second.getScalarType() != MVT::f16)
4650 unsigned Factor = 1;
4665 AArch64::FCMEQv4f32))
4677 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4696 Op1Info, Op2Info,
I);
4702 if (ST->requiresStrictAlign()) {
4707 Options.AllowOverlappingLoads =
true;
4708 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4713 Options.LoadSizes = {8, 4, 2, 1};
4714 Options.AllowedTailExpansions = {3, 5, 6};
4719 return ST->hasSVE();
4730 if (!LT.first.isValid())
4735 if (VT->getElementType()->isIntegerTy(1))
4752 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4753 "Should be called on only load or stores.");
4755 case Instruction::Load:
4758 return ST->getGatherOverhead();
4760 case Instruction::Store:
4763 return ST->getScatterOverhead();
4771 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
4778 if (!LT.first.isValid())
4782 if (!LT.second.isVector() ||
4784 VT->getElementType()->isIntegerTy(1))
4794 ElementCount LegalVF = LT.second.getVectorElementCount();
4797 {TTI::OK_AnyValue, TTI::OP_None},
I);
4813 EVT VT = TLI->getValueType(
DL, Ty,
true);
4815 if (VT == MVT::Other)
4820 if (!LT.first.isValid())
4830 (VTy->getElementType()->isIntegerTy(1) &&
4831 !VTy->getElementCount().isKnownMultipleOf(
4842 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4843 LT.second.is128BitVector() && Alignment <
Align(16)) {
4849 const int AmortizationCost = 6;
4851 return LT.first * 2 * AmortizationCost;
4855 if (Ty->isPtrOrPtrVectorTy())
4860 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4862 if (VT == MVT::v4i8)
4869 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4884 while (!TypeWorklist.
empty()) {
4906 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4907 assert(Factor >= 2 &&
"Invalid interleave factor");
4922 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4925 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4926 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4929 VecVTy->getElementCount().divideCoefficientBy(Factor));
4935 if (MinElts % Factor == 0 &&
4936 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4937 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
4942 UseMaskForCond, UseMaskForGaps);
4949 for (
auto *
I : Tys) {
4950 if (!
I->isVectorTy())
4961 return ST->getMaxInterleaveFactor();
4971 enum { MaxStridedLoads = 7 };
4973 int StridedLoads = 0;
4976 for (
const auto BB : L->blocks()) {
4977 for (
auto &
I : *BB) {
4983 if (L->isLoopInvariant(PtrValue))
4988 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4997 if (StridedLoads > MaxStridedLoads / 2)
4998 return StridedLoads;
5001 return StridedLoads;
5004 int StridedLoads = countStridedLoads(L, SE);
5006 <<
" strided loads\n");
5022 unsigned *FinalSize) {
5026 for (
auto *BB : L->getBlocks()) {
5027 for (
auto &
I : *BB) {
5033 if (!Cost.isValid())
5037 if (LoopCost > Budget)
5059 if (MaxTC > 0 && MaxTC <= 32)
5070 if (Blocks.
size() != 2)
5092 if (!L->isInnermost() || L->getNumBlocks() > 8)
5096 if (!L->getExitBlock())
5102 bool HasParellelizableReductions =
5103 L->getNumBlocks() == 1 &&
5104 any_of(L->getHeader()->phis(),
5106 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5109 if (HasParellelizableReductions &&
5131 if (HasParellelizableReductions) {
5142 if (Header == Latch) {
5145 unsigned Width = 10;
5151 unsigned MaxInstsPerLine = 16;
5153 unsigned BestUC = 1;
5154 unsigned SizeWithBestUC = BestUC *
Size;
5156 unsigned SizeWithUC = UC *
Size;
5157 if (SizeWithUC > 48)
5159 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5160 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5162 SizeWithBestUC = BestUC *
Size;
5172 for (
auto *BB : L->blocks()) {
5173 for (
auto &
I : *BB) {
5183 for (
auto *U :
I.users())
5185 LoadedValuesPlus.
insert(U);
5192 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5205 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5219 auto *I = dyn_cast<Instruction>(V);
5220 return I && DependsOnLoopLoad(I, Depth + 1);
5227 DependsOnLoopLoad(
I, 0)) {
5243 if (L->getLoopDepth() > 1)
5253 for (
auto *BB : L->getBlocks()) {
5254 for (
auto &
I : *BB) {
5258 if (IsVectorized &&
I.getType()->isVectorTy())
5271 switch (ST->getProcFamily()) {
5272 case AArch64Subtarget::AppleA14:
5273 case AArch64Subtarget::AppleA15:
5274 case AArch64Subtarget::AppleA16:
5275 case AArch64Subtarget::AppleM4:
5278 case AArch64Subtarget::Falkor:
5304 !ST->getSchedModel().isOutOfOrder()) {
5322 bool CanCreate)
const {
5326 case Intrinsic::aarch64_neon_st2:
5327 case Intrinsic::aarch64_neon_st3:
5328 case Intrinsic::aarch64_neon_st4: {
5331 if (!CanCreate || !ST)
5333 unsigned NumElts = Inst->
arg_size() - 1;
5334 if (ST->getNumElements() != NumElts)
5336 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5342 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5344 Res = Builder.CreateInsertValue(Res, L, i);
5348 case Intrinsic::aarch64_neon_ld2:
5349 case Intrinsic::aarch64_neon_ld3:
5350 case Intrinsic::aarch64_neon_ld4:
5351 if (Inst->
getType() == ExpectedType)
5362 case Intrinsic::aarch64_neon_ld2:
5363 case Intrinsic::aarch64_neon_ld3:
5364 case Intrinsic::aarch64_neon_ld4:
5365 Info.ReadMem =
true;
5366 Info.WriteMem =
false;
5369 case Intrinsic::aarch64_neon_st2:
5370 case Intrinsic::aarch64_neon_st3:
5371 case Intrinsic::aarch64_neon_st4:
5372 Info.ReadMem =
false;
5373 Info.WriteMem =
true;
5381 case Intrinsic::aarch64_neon_ld2:
5382 case Intrinsic::aarch64_neon_st2:
5383 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5385 case Intrinsic::aarch64_neon_ld3:
5386 case Intrinsic::aarch64_neon_st3:
5387 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5389 case Intrinsic::aarch64_neon_ld4:
5390 case Intrinsic::aarch64_neon_st4:
5391 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5403 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5404 bool Considerable =
false;
5405 AllowPromotionWithoutCommonHeader =
false;
5408 Type *ConsideredSExtType =
5410 if (
I.getType() != ConsideredSExtType)
5414 for (
const User *U :
I.users()) {
5416 Considerable =
true;
5420 if (GEPInst->getNumOperands() > 2) {
5421 AllowPromotionWithoutCommonHeader =
true;
5426 return Considerable;
5474 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5484 return LegalizationCost + 2;
5494 LegalizationCost *= LT.first - 1;
5497 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5506 return LegalizationCost + 2;
5514 std::optional<FastMathFlags> FMF,
5530 return BaseCost + FixedVTy->getNumElements();
5533 if (Opcode != Instruction::FAdd)
5547 MVT MTy = LT.second;
5548 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5596 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5597 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5599 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5609 return (LT.first - 1) +
Log2_32(NElts);
5614 return (LT.first - 1) + Entry->Cost;
5626 if (LT.first != 1) {
5632 ExtraCost *= LT.first - 1;
5635 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5636 return Cost + ExtraCost;
5644 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5646 EVT VecVT = TLI->getValueType(
DL, VecTy);
5647 EVT ResVT = TLI->getValueType(
DL, ResTy);
5657 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5659 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5661 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5663 return (LT.first - 1) * 2 + 2;
5674 EVT VecVT = TLI->getValueType(
DL, VecTy);
5675 EVT ResVT = TLI->getValueType(
DL, ResTy);
5678 RedOpcode == Instruction::Add) {
5684 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5686 return LT.first + 2;
5721 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5722 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5736 if (LT.second.getScalarType() == MVT::i1) {
5745 assert(Entry &&
"Illegal Type for Splice");
5746 LegalizationCost += Entry->Cost;
5747 return LegalizationCost * LT.first;
5751 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5760 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5761 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5764 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5769 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5770 "Unexpected values for OpBExtend or InputTypeB");
5774 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5777 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5778 if (IsUSDot && !ST->hasMatMulInt8())
5790 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5799 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5805 std::pair<InstructionCost, MVT> AccumLT =
5807 std::pair<InstructionCost, MVT> InputLT =
5820 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5822 if (AccumLT.second.getScalarType() == MVT::i64 &&
5823 InputLT.second.getScalarType() == MVT::i16)
5826 if (AccumLT.second.getScalarType() == MVT::i64 &&
5827 InputLT.second.getScalarType() == MVT::i8)
5837 if (ST->isSVEorStreamingSVEAvailable() ||
5838 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5839 ST->hasDotProd())) {
5840 if (AccumLT.second.getScalarType() == MVT::i32 &&
5841 InputLT.second.getScalarType() == MVT::i8)
5857 "Expected the Mask to match the return size if given");
5859 "Expected the same scalar types");
5865 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5866 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5867 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5875 return std::max<InstructionCost>(1, LT.first / 4);
5883 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5885 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5888 unsigned TpNumElts = Mask.size();
5889 unsigned LTNumElts = LT.second.getVectorNumElements();
5890 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5892 LT.second.getVectorElementCount());
5894 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5896 for (
unsigned N = 0;
N < NumVecs;
N++) {
5900 unsigned Source1 = -1U, Source2 = -1U;
5901 unsigned NumSources = 0;
5902 for (
unsigned E = 0; E < LTNumElts; E++) {
5903 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5912 unsigned Source = MaskElt / LTNumElts;
5913 if (NumSources == 0) {
5916 }
else if (NumSources == 1 && Source != Source1) {
5919 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5925 if (Source == Source1)
5927 else if (Source == Source2)
5928 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5937 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5948 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5951 Result.first->second = NCost;
5965 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
5966 if (LT.second.getFixedSizeInBits() >= 128 &&
5968 LT.second.getVectorNumElements() / 2) {
5971 if (Index == (
int)LT.second.getVectorNumElements() / 2)
5985 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
5986 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
5995 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5996 ST->isSVEorStreamingSVEAvailable() &&
6001 if (ST->isSVEorStreamingSVEAvailable() &&
6015 if (IsLoad && LT.second.isVector() &&
6017 LT.second.getVectorElementCount()))
6023 if (Mask.size() == 4 &&
6025 (SrcTy->getScalarSizeInBits() == 16 ||
6026 SrcTy->getScalarSizeInBits() == 32) &&
6027 all_of(Mask, [](
int E) {
return E < 8; }))
6031 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6034 return M.value() < 0 || M.value() == (int)M.index();
6041 if (LT.second.isFixedLengthVector() &&
6042 LT.second.getVectorNumElements() == Mask.size() &&
6044 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6045 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6046 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6047 LT.second.getVectorNumElements(), 16) ||
6048 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6049 LT.second.getVectorNumElements(), 32) ||
6050 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6051 LT.second.getVectorNumElements(), 64) ||
6054 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6183 return LT.first * Entry->Cost;
6192 LT.second.getSizeInBits() <= 128 && SubTp) {
6194 if (SubLT.second.isVector()) {
6195 int NumElts = LT.second.getVectorNumElements();
6196 int NumSubElts = SubLT.second.getVectorNumElements();
6197 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6203 if (IsExtractSubvector)
6239 return ST->useFixedOverScalableIfEqualCost();
6243 return ST->getEpilogueVectorizationMinVF();
6278 unsigned NumInsns = 0;
6280 NumInsns += BB->sizeWithoutDebug();
6290 int64_t Scale,
unsigned AddrSpace)
const {
6318 if (
I->getOpcode() == Instruction::Or &&
6323 if (
I->getOpcode() == Instruction::Add ||
6324 I->getOpcode() == Instruction::Sub)
6349 return all_equal(Shuf->getShuffleMask());
6356 bool AllowSplat =
false) {
6361 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6362 auto *FullTy = FullV->
getType();
6363 auto *HalfTy = HalfV->getType();
6365 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6368 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6371 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6375 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6389 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6390 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6404 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6405 (M2Start != 0 && M2Start != (NumElements / 2)))
6407 if (S1Op1 && S2Op1 && M1Start != M2Start)
6417 return Ext->getType()->getScalarSizeInBits() ==
6418 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6432 Value *VectorOperand =
nullptr;
6449 if (!
GEP ||
GEP->getNumOperands() != 2)
6453 Value *Offsets =
GEP->getOperand(1);
6456 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6462 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6463 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6464 Ops.push_back(&
GEP->getOperandUse(1));
6498 switch (
II->getIntrinsicID()) {
6499 case Intrinsic::aarch64_neon_smull:
6500 case Intrinsic::aarch64_neon_umull:
6503 Ops.push_back(&
II->getOperandUse(0));
6504 Ops.push_back(&
II->getOperandUse(1));
6509 case Intrinsic::fma:
6510 case Intrinsic::fmuladd:
6516 case Intrinsic::aarch64_neon_sqdmull:
6517 case Intrinsic::aarch64_neon_sqdmulh:
6518 case Intrinsic::aarch64_neon_sqrdmulh:
6521 Ops.push_back(&
II->getOperandUse(0));
6523 Ops.push_back(&
II->getOperandUse(1));
6524 return !
Ops.empty();
6525 case Intrinsic::aarch64_neon_fmlal:
6526 case Intrinsic::aarch64_neon_fmlal2:
6527 case Intrinsic::aarch64_neon_fmlsl:
6528 case Intrinsic::aarch64_neon_fmlsl2:
6531 Ops.push_back(&
II->getOperandUse(1));
6533 Ops.push_back(&
II->getOperandUse(2));
6534 return !
Ops.empty();
6535 case Intrinsic::aarch64_sve_ptest_first:
6536 case Intrinsic::aarch64_sve_ptest_last:
6538 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6539 Ops.push_back(&
II->getOperandUse(0));
6540 return !
Ops.empty();
6541 case Intrinsic::aarch64_sme_write_horiz:
6542 case Intrinsic::aarch64_sme_write_vert:
6543 case Intrinsic::aarch64_sme_writeq_horiz:
6544 case Intrinsic::aarch64_sme_writeq_vert: {
6546 if (!Idx || Idx->getOpcode() != Instruction::Add)
6548 Ops.push_back(&
II->getOperandUse(1));
6551 case Intrinsic::aarch64_sme_read_horiz:
6552 case Intrinsic::aarch64_sme_read_vert:
6553 case Intrinsic::aarch64_sme_readq_horiz:
6554 case Intrinsic::aarch64_sme_readq_vert:
6555 case Intrinsic::aarch64_sme_ld1b_vert:
6556 case Intrinsic::aarch64_sme_ld1h_vert:
6557 case Intrinsic::aarch64_sme_ld1w_vert:
6558 case Intrinsic::aarch64_sme_ld1d_vert:
6559 case Intrinsic::aarch64_sme_ld1q_vert:
6560 case Intrinsic::aarch64_sme_st1b_vert:
6561 case Intrinsic::aarch64_sme_st1h_vert:
6562 case Intrinsic::aarch64_sme_st1w_vert:
6563 case Intrinsic::aarch64_sme_st1d_vert:
6564 case Intrinsic::aarch64_sme_st1q_vert:
6565 case Intrinsic::aarch64_sme_ld1b_horiz:
6566 case Intrinsic::aarch64_sme_ld1h_horiz:
6567 case Intrinsic::aarch64_sme_ld1w_horiz:
6568 case Intrinsic::aarch64_sme_ld1d_horiz:
6569 case Intrinsic::aarch64_sme_ld1q_horiz:
6570 case Intrinsic::aarch64_sme_st1b_horiz:
6571 case Intrinsic::aarch64_sme_st1h_horiz:
6572 case Intrinsic::aarch64_sme_st1w_horiz:
6573 case Intrinsic::aarch64_sme_st1d_horiz:
6574 case Intrinsic::aarch64_sme_st1q_horiz: {
6576 if (!Idx || Idx->getOpcode() != Instruction::Add)
6578 Ops.push_back(&
II->getOperandUse(3));
6581 case Intrinsic::aarch64_neon_pmull:
6584 Ops.push_back(&
II->getOperandUse(0));
6585 Ops.push_back(&
II->getOperandUse(1));
6587 case Intrinsic::aarch64_neon_pmull64:
6589 II->getArgOperand(1)))
6591 Ops.push_back(&
II->getArgOperandUse(0));
6592 Ops.push_back(&
II->getArgOperandUse(1));
6594 case Intrinsic::masked_gather:
6597 Ops.push_back(&
II->getArgOperandUse(0));
6599 case Intrinsic::masked_scatter:
6602 Ops.push_back(&
II->getArgOperandUse(1));
6609 auto ShouldSinkCondition = [](
Value *
Cond,
6614 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6618 Ops.push_back(&
II->getOperandUse(0));
6622 switch (
I->getOpcode()) {
6623 case Instruction::GetElementPtr:
6624 case Instruction::Add:
6625 case Instruction::Sub:
6627 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6629 Ops.push_back(&
I->getOperandUse(
Op));
6634 case Instruction::Select: {
6635 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6638 Ops.push_back(&
I->getOperandUse(0));
6641 case Instruction::Br: {
6648 Ops.push_back(&
I->getOperandUse(0));
6655 if (!
I->getType()->isVectorTy())
6658 switch (
I->getOpcode()) {
6659 case Instruction::Sub:
6660 case Instruction::Add: {
6669 Ops.push_back(&Ext1->getOperandUse(0));
6670 Ops.push_back(&Ext2->getOperandUse(0));
6673 Ops.push_back(&
I->getOperandUse(0));
6674 Ops.push_back(&
I->getOperandUse(1));
6678 case Instruction::Or: {
6681 if (ST->hasNEON()) {
6695 if (
I->getParent() != MainAnd->
getParent() ||
6700 if (
I->getParent() != IA->getParent() ||
6701 I->getParent() != IB->getParent())
6706 Ops.push_back(&
I->getOperandUse(0));
6707 Ops.push_back(&
I->getOperandUse(1));
6716 case Instruction::Mul: {
6717 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6720 if (Ty->isScalableTy())
6724 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6727 int NumZExts = 0, NumSExts = 0;
6728 for (
auto &
Op :
I->operands()) {
6735 auto *ExtOp = Ext->getOperand(0);
6736 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6737 Ops.push_back(&Ext->getOperandUse(0));
6745 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6746 I->getType()->getScalarSizeInBits())
6783 if (!ElementConstant || !ElementConstant->
isZero())
6786 unsigned Opcode = OperandInstr->
getOpcode();
6787 if (Opcode == Instruction::SExt)
6789 else if (Opcode == Instruction::ZExt)
6794 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6804 Ops.push_back(&Insert->getOperandUse(1));
6810 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6814 if (!ShouldSinkSplatForIndexedVariant(
I))
6819 Ops.push_back(&
I->getOperandUse(0));
6821 Ops.push_back(&
I->getOperandUse(1));
6823 return !
Ops.empty();
6825 case Instruction::FMul: {
6827 if (
I->getType()->isScalableTy())
6836 Ops.push_back(&
I->getOperandUse(0));
6838 Ops.push_back(&
I->getOperandUse(1));
6839 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...