29#include "llvm/IR/IntrinsicsAMDGPU.h"
37#define DEBUG_TYPE "AMDGPUtti"
40 "amdgpu-unroll-threshold-private",
41 cl::desc(
"Unroll threshold for AMDGPU if private memory used in a loop"),
45 "amdgpu-unroll-threshold-local",
46 cl::desc(
"Unroll threshold for AMDGPU if local memory used in a loop"),
50 "amdgpu-unroll-threshold-if",
51 cl::desc(
"Unroll threshold increment for AMDGPU for each if statement inside loop"),
55 "amdgpu-unroll-runtime-local",
56 cl::desc(
"Allow runtime unroll for AMDGPU if local memory used in a loop"),
60 "amdgpu-unroll-max-block-to-analyze",
61 cl::desc(
"Inner loop block size threshold to analyze in unroll for AMDGPU"),
66 cl::desc(
"Cost of alloca argument"));
74 cl::desc(
"Maximum alloca size to use for inline cost"));
79 cl::desc(
"Maximum number of BBs allowed in a function after inlining"
80 " (compile time constraint)"));
84 "amdgpu-memcpy-loop-unroll",
85 cl::desc(
"Unroll factor (affecting 4x32-bit operations) to use for memory "
86 "operations when lowering statically-sized memcpy, memmove, or"
96 for (
const Value *V :
I->operand_values()) {
101 return SubLoop->contains(PHI); }))
111 TargetTriple(TM->getTargetTriple()),
113 TLI(ST->getTargetLowering()) {}
118 const Function &
F = *L->getHeader()->getParent();
120 F.getFnAttributeAsParsedInteger(
"amdgpu-unroll-threshold", 300);
121 UP.
MaxCount = std::numeric_limits<unsigned>::max();
134 const unsigned MaxAlloca = (256 - 16) * 4;
140 if (
MDNode *LoopUnrollThreshold =
142 if (LoopUnrollThreshold->getNumOperands() == 2) {
144 LoopUnrollThreshold->getOperand(1));
145 if (MetaThresholdValue) {
151 ThresholdPrivate = std::min(ThresholdPrivate, UP.
Threshold);
152 ThresholdLocal = std::min(ThresholdLocal, UP.
Threshold);
157 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
160 unsigned LocalGEPsSeen = 0;
163 return SubLoop->contains(BB); }))
176 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
177 (L->contains(Succ1) && L->isLoopExiting(Succ1)))
183 << *L <<
" due to " << *Br <<
'\n');
195 unsigned AS =
GEP->getAddressSpace();
196 unsigned Threshold = 0;
198 Threshold = ThresholdPrivate;
200 Threshold = ThresholdLocal;
208 const Value *Ptr =
GEP->getPointerOperand();
214 if (!AllocaSize || AllocaSize->getFixedValue() > MaxAlloca)
223 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
228 << *L <<
" due to LDS use.\n");
233 bool HasLoopDef =
false;
236 if (!Inst || L->isLoopInvariant(
Op))
240 return SubLoop->contains(Inst); }))
264 << *L <<
" due to " << *
GEP <<
'\n');
292 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
293 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureUseFlatForGlobal,
294 AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode,
296 AMDGPU::FeatureAutoWaitcntBeforeBarrier,
299 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,
300 AMDGPU::FeatureTrapHandler,
304 AMDGPU::FeatureSRAMECC,
307 AMDGPU::FeatureFastFMAF32, AMDGPU::FeatureHalfRate64Ops};
312 TLI(ST->getTargetLowering()), CommonTTI(TM,
F),
313 IsGraphics(
AMDGPU::isGraphics(
F.getCallingConv())) {
316 HasFP64FP16Denormals =
321 return !
F || !ST->isSingleLaneExecution(*
F);
353 if (Opcode == Instruction::Load || Opcode == Instruction::Store)
354 return 32 * 4 / ElemWidth;
357 return (ElemWidth == 8 && ST->has16BitInsts()) ? 4
358 : (ElemWidth == 16 && ST->has16BitInsts()) ? 2
359 : (ElemWidth == 32 && ST->hasPackedFP32Ops()) ? 2
364 unsigned ChainSizeInBytes,
366 unsigned VecRegBitWidth = VF * LoadSize;
369 return 128 / LoadSize;
375 unsigned ChainSizeInBytes,
377 unsigned VecRegBitWidth = VF * StoreSize;
378 if (VecRegBitWidth > 128)
379 return 128 / StoreSize;
395 return 8 * ST->getMaxPrivateElementSize();
403 unsigned AddrSpace)
const {
408 return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled()) &&
409 ChainSizeInBytes <= ST->getMaxPrivateElementSize();
416 unsigned AddrSpace)
const {
422 unsigned AddrSpace)
const {
432 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
433 std::optional<uint32_t> AtomicElementSize)
const {
435 if (AtomicElementSize)
449 unsigned I32EltsInVector = 4;
459 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
461 std::optional<uint32_t> AtomicCpySize)
const {
465 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
466 DestAlign, AtomicCpySize);
469 while (RemainingBytes >= 16) {
471 RemainingBytes -= 16;
475 while (RemainingBytes >= 8) {
481 while (RemainingBytes >= 4) {
487 while (RemainingBytes >= 2) {
493 while (RemainingBytes) {
511 case Intrinsic::amdgcn_ds_ordered_add:
512 case Intrinsic::amdgcn_ds_ordered_swap: {
515 if (!Ordering || !Volatile)
518 unsigned OrderingVal = Ordering->getZExtValue();
525 Info.WriteMem =
true;
526 Info.IsVolatile = !Volatile->isZero();
540 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
541 int ISD = TLI->InstructionOpcodeToISD(Opcode);
545 unsigned NElts = LT.second.isVector() ?
546 LT.second.getVectorNumElements() : 1;
555 return get64BitInstrCost(
CostKind) * LT.first * NElts;
557 if (ST->has16BitInsts() && SLT == MVT::i16)
558 NElts = (NElts + 1) / 2;
561 return getFullRateInstrCost() * LT.first * NElts;
567 if (SLT == MVT::i64) {
569 return 2 * getFullRateInstrCost() * LT.first * NElts;
572 if (ST->has16BitInsts() && SLT == MVT::i16)
573 NElts = (NElts + 1) / 2;
575 return LT.first * NElts * getFullRateInstrCost();
577 const int QuarterRateCost = getQuarterRateInstrCost(
CostKind);
578 if (SLT == MVT::i64) {
579 const int FullRateCost = getFullRateInstrCost();
580 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
583 if (ST->has16BitInsts() && SLT == MVT::i16)
584 NElts = (NElts + 1) / 2;
587 return QuarterRateCost * NElts * LT.first;
595 const int OPC = TLI->InstructionOpcodeToISD(
FAdd->getOpcode());
597 if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)
599 if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)
612 if (ST->hasPackedFP32Ops() && SLT == MVT::f32)
613 NElts = (NElts + 1) / 2;
614 if (ST->hasBF16PackedInsts() && SLT == MVT::bf16)
615 NElts = (NElts + 1) / 2;
617 return LT.first * NElts * get64BitInstrCost(
CostKind);
619 if (ST->has16BitInsts() && SLT == MVT::f16)
620 NElts = (NElts + 1) / 2;
622 if (SLT == MVT::f32 || SLT == MVT::f16 || SLT == MVT::bf16)
623 return LT.first * NElts * getFullRateInstrCost();
629 if (SLT == MVT::f64) {
634 if (!ST->hasUsableDivScaleConditionOutput())
635 Cost += 3 * getFullRateInstrCost();
637 return LT.first *
Cost * NElts;
642 if ((SLT == MVT::f32 && !HasFP32Denormals) ||
643 (SLT == MVT::f16 && ST->has16BitInsts())) {
644 return LT.first * getTransInstrCost(
CostKind) * NElts;
648 if (SLT == MVT::f16 && ST->has16BitInsts()) {
654 int Cost = 4 * getFullRateInstrCost() + 2 * getTransInstrCost(
CostKind);
655 return LT.first *
Cost * NElts;
662 int Cost = getTransInstrCost(
CostKind) + getFullRateInstrCost();
663 return LT.first *
Cost * NElts;
666 if (SLT == MVT::f32 || SLT == MVT::f16) {
668 int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +
671 if (!HasFP32Denormals) {
673 Cost += 2 * getFullRateInstrCost();
676 return LT.first * NElts *
Cost;
682 return TLI->isFNegFree(SLT) ? 0 : NElts;
696 case Intrinsic::fmuladd:
697 case Intrinsic::copysign:
698 case Intrinsic::minimumnum:
699 case Intrinsic::maximumnum:
700 case Intrinsic::canonicalize:
702 case Intrinsic::round:
703 case Intrinsic::uadd_sat:
704 case Intrinsic::usub_sat:
705 case Intrinsic::sadd_sat:
706 case Intrinsic::ssub_sat:
717 switch (ICA.
getID()) {
718 case Intrinsic::fabs:
721 case Intrinsic::amdgcn_workitem_id_x:
722 case Intrinsic::amdgcn_workitem_id_y:
723 case Intrinsic::amdgcn_workitem_id_z:
727 case Intrinsic::amdgcn_workgroup_id_x:
728 case Intrinsic::amdgcn_workgroup_id_y:
729 case Intrinsic::amdgcn_workgroup_id_z:
730 case Intrinsic::amdgcn_lds_kernel_id:
731 case Intrinsic::amdgcn_dispatch_ptr:
732 case Intrinsic::amdgcn_dispatch_id:
733 case Intrinsic::amdgcn_implicitarg_ptr:
734 case Intrinsic::amdgcn_queue_ptr:
746 case Intrinsic::exp2:
747 case Intrinsic::exp10: {
749 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
752 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
754 if (SLT == MVT::f64) {
756 if (IID == Intrinsic::exp)
758 else if (IID == Intrinsic::exp10)
764 if (SLT == MVT::f32) {
765 unsigned NumFullRateOps = 0;
767 unsigned NumTransOps = 1;
773 NumFullRateOps = ST->hasFastFMAF32() ? 13 : 17;
775 if (IID == Intrinsic::exp) {
778 }
else if (IID == Intrinsic::exp10) {
784 if (HasFP32Denormals)
789 NumTransOps * getTransInstrCost(
CostKind);
790 return LT.first * NElts *
Cost;
796 case Intrinsic::log2:
797 case Intrinsic::log10: {
798 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
801 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
803 if (SLT == MVT::f32) {
804 unsigned NumFullRateOps = 0;
806 if (IID == Intrinsic::log2) {
814 NumFullRateOps = ST->hasFastFMAF32() ? 8 : 11;
817 if (HasFP32Denormals)
821 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
822 return LT.first * NElts *
Cost;
828 case Intrinsic::cos: {
829 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
832 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
834 if (SLT == MVT::f32) {
836 unsigned NumFullRateOps = ST->hasTrigReducedRange() ? 2 : 1;
839 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
840 return LT.first * NElts *
Cost;
845 case Intrinsic::sqrt: {
846 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
849 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
851 if (SLT == MVT::f32) {
852 unsigned NumFullRateOps = 0;
856 NumFullRateOps = HasFP32Denormals ? 17 : 16;
860 NumFullRateOps * getFullRateInstrCost() + getTransInstrCost(
CostKind);
861 return LT.first * NElts *
Cost;
873 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
875 unsigned NElts = LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
877 if ((ST->hasVOP3PInsts() &&
878 (SLT == MVT::f16 || SLT == MVT::i16 ||
879 (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) ||
880 (ST->hasPackedFP32Ops() && SLT == MVT::f32))
881 NElts = (NElts + 1) / 2;
884 unsigned InstRate = getQuarterRateInstrCost(
CostKind);
886 switch (ICA.
getID()) {
888 case Intrinsic::fmuladd:
889 if (SLT == MVT::f64) {
890 InstRate = get64BitInstrCost(
CostKind);
894 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
895 InstRate = getFullRateInstrCost();
897 InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(
CostKind)
898 : getQuarterRateInstrCost(
CostKind);
901 case Intrinsic::copysign:
902 return NElts * getFullRateInstrCost();
903 case Intrinsic::minimumnum:
904 case Intrinsic::maximumnum: {
916 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
917 InstRate = BaseRate *
NumOps;
920 case Intrinsic::canonicalize: {
922 SLT == MVT::f64 ? get64BitInstrCost(
CostKind) : getFullRateInstrCost();
925 case Intrinsic::uadd_sat:
926 case Intrinsic::usub_sat:
927 case Intrinsic::sadd_sat:
928 case Intrinsic::ssub_sat: {
929 if (SLT == MVT::i16 || SLT == MVT::i32)
930 InstRate = getFullRateInstrCost();
932 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
939 if (SLT == MVT::i16 || SLT == MVT::i32)
940 InstRate = 2 * getFullRateInstrCost();
946 return LT.first * NElts * InstRate;
952 assert((
I ==
nullptr ||
I->getOpcode() == Opcode) &&
953 "Opcode should reflect passed instruction.");
956 const int CBrCost = SCost ? 5 : 7;
958 case Instruction::UncondBr:
960 return SCost ? 1 : 4;
961 case Instruction::CondBr:
965 case Instruction::Switch: {
969 return (
SI ? (
SI->getNumCases() + 1) : 4) * (CBrCost + 1);
971 case Instruction::Ret:
972 return SCost ? 1 : 10;
979 std::optional<FastMathFlags> FMF,
984 EVT OrigTy = TLI->getValueType(
DL, Ty);
991 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
992 return LT.first * getFullRateInstrCost();
999 EVT OrigTy = TLI->getValueType(
DL, Ty);
1006 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1007 return LT.first * getHalfRateInstrCost(
CostKind);
1014 case Instruction::ExtractElement:
1015 case Instruction::InsertElement: {
1019 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
1030 return Index == ~0u ? 2 : 0;
1045 if (Indices.
size() > 1)
1051 TLI->ParseConstraints(
DL, ST->getRegisterInfo(), *CI);
1053 const int TargetOutputIdx = Indices.
empty() ? -1 : Indices[0];
1056 for (
auto &TC : TargetConstraints) {
1061 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)
1064 TLI->ComputeConstraintToUse(TC,
SDValue());
1067 TRI, TC.ConstraintCode, TC.ConstraintVT).second;
1071 if (!RC || !
TRI->isSGPRClass(RC))
1101bool GCNTTIImpl::isSourceOfDivergence(
const Value *V)
const {
1125 case Intrinsic::read_register:
1127 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1129 Intrinsic->getOperand(0)->getType()->getPointerAddressSpace();
1130 unsigned DstAS =
Intrinsic->getType()->getPointerAddressSpace();
1133 ST->hasGloballyAddressableScratch();
1135 case Intrinsic::amdgcn_workitem_id_y:
1136 case Intrinsic::amdgcn_workitem_id_z: {
1141 *
F, IID == Intrinsic::amdgcn_workitem_id_y ? 1 : 2);
1142 return !HasUniformYZ && (!ThisDimSize || *ThisDimSize != 1);
1151 if (CI->isInlineAsm())
1166 ST->hasGloballyAddressableScratch();
1172bool GCNTTIImpl::isAlwaysUniform(
const Value *V)
const {
1177 if (CI->isInlineAsm())
1195 bool XDimDoesntResetWithinWaves =
false;
1198 XDimDoesntResetWithinWaves = ST->hasWavefrontsEvenlySplittingXDim(*
F);
1200 using namespace llvm::PatternMatch;
1206 return C >= ST->getWavefrontSizeLog2() && XDimDoesntResetWithinWaves;
1213 ST->getWavefrontSizeLog2() &&
1214 XDimDoesntResetWithinWaves;
1229 case Intrinsic::amdgcn_if:
1230 case Intrinsic::amdgcn_else: {
1231 ArrayRef<unsigned> Indices = ExtValue->
getIndices();
1232 return Indices.
size() == 1 && Indices[0] == 1;
1249 case Intrinsic::amdgcn_is_shared:
1250 case Intrinsic::amdgcn_is_private:
1251 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1252 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1253 case Intrinsic::amdgcn_load_to_lds:
1254 case Intrinsic::amdgcn_make_buffer_rsrc:
1264 Value *NewV)
const {
1265 auto IntrID =
II->getIntrinsicID();
1267 case Intrinsic::amdgcn_is_shared:
1268 case Intrinsic::amdgcn_is_private: {
1269 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?
1277 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1278 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
1279 Type *DestTy =
II->getType();
1286 M,
II->getIntrinsicID(), {DestTy, SrcTy, DestTy});
1287 II->setArgOperand(0, NewV);
1288 II->setCalledFunction(NewDecl);
1291 case Intrinsic::amdgcn_load_to_lds: {
1296 II->setArgOperand(0, NewV);
1297 II->setCalledFunction(NewDecl);
1300 case Intrinsic::amdgcn_make_buffer_rsrc: {
1302 Type *DstTy =
II->getType();
1305 M,
II->getIntrinsicID(), {DstTy, SrcTy});
1306 II->setArgOperand(0, NewV);
1307 II->setCalledFunction(NewDecl);
1328 unsigned ScalarSize =
DL.getTypeSizeInBits(SrcTy->getElementType());
1330 (ScalarSize == 16 || ScalarSize == 8)) {
1343 unsigned NumSrcElts = SrcVecTy->getNumElements();
1344 if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumSrcElts == 2 &&
1350 unsigned EltsPerReg = 32 / ScalarSize;
1358 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1361 if (Index % EltsPerReg == 0)
1364 return divideCeil(DstVecTy->getNumElements(), EltsPerReg);
1370 unsigned NumDstElts = DstVecTy->getNumElements();
1372 unsigned EndIndex = Index + NumInsertElts;
1373 unsigned BeginSubIdx = Index % EltsPerReg;
1374 unsigned EndSubIdx = EndIndex % EltsPerReg;
1377 if (BeginSubIdx != 0) {
1385 if (EndIndex < NumDstElts && BeginSubIdx < EndSubIdx)
1394 unsigned NumElts = DstVecTy->getNumElements();
1398 unsigned EltsFromLHS = NumElts - Index;
1399 bool LHSIsAligned = (Index % EltsPerReg) == 0;
1400 bool RHSIsAligned = (EltsFromLHS % EltsPerReg) == 0;
1401 if (LHSIsAligned && RHSIsAligned)
1403 if (LHSIsAligned && !RHSIsAligned)
1404 return divideCeil(NumElts, EltsPerReg) - (EltsFromLHS / EltsPerReg);
1405 if (!LHSIsAligned && RHSIsAligned)
1413 if (!Mask.empty()) {
1423 for (
unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx += EltsPerReg) {
1426 for (
unsigned I = 0;
I < EltsPerReg && DstIdx +
I < Mask.size(); ++
I) {
1427 int SrcIdx = Mask[DstIdx +
I];
1431 if (SrcIdx < (
int)NumSrcElts) {
1432 Reg = SrcIdx / EltsPerReg;
1433 if (SrcIdx % EltsPerReg !=
I)
1436 Reg = NumSrcElts + (SrcIdx - NumSrcElts) / EltsPerReg;
1437 if ((SrcIdx - NumSrcElts) % EltsPerReg !=
I)
1443 if (Regs.
size() >= 2)
1463 for (
auto &
Op :
I->operands()) {
1476 if (OpInst->getType()->isVectorTy() && OpInst->getNumOperands() > 1) {
1478 if (VecOpInst && VecOpInst->
hasOneUse())
1483 OpInst->getOperand(0),
1484 OpInst->getOperand(1)) == 0) {
1493 unsigned EltSize =
DL.getTypeSizeInBits(
1498 if (EltSize < 16 || !ST->has16BitInsts())
1501 int NumSubElts, SubIndex;
1502 if (Shuffle->changesLength()) {
1503 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) {
1508 if ((Shuffle->isExtractSubvectorMask(SubIndex) ||
1509 Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) &&
1510 !(SubIndex & 0x1)) {
1516 if (Shuffle->isReverse() || Shuffle->isZeroEltSplat() ||
1517 Shuffle->isSingleSource()) {
1524 return !
Ops.empty();
1535 const FeatureBitset &CallerBits = CallerST->getFeatureBits();
1536 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();
1538 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
1539 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
1540 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
1550 if (Callee->hasFnAttribute(Attribute::AlwaysInline) ||
1551 Callee->hasFnAttribute(Attribute::InlineHint))
1557 if (Callee->size() == 1)
1559 size_t BBSize = Caller->size() + Callee->size() - 1;
1569 const int NrOfSGPRUntilSpill = 26;
1570 const int NrOfVGPRUntilSpill = 32;
1574 unsigned adjustThreshold = 0;
1580 for (
auto ArgVT : ValueVTs) {
1584 SGPRsInUse += CCRegNum;
1586 VGPRsInUse += CCRegNum;
1596 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1599 ArgStackCost +=
const_cast<GCNTTIImpl *
>(TTIImpl)->getMemoryOpCost(
1605 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1607 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1609 return adjustThreshold;
1618 unsigned AllocaSize = 0;
1625 unsigned AddrSpace = Ty->getAddressSpace();
1635 AllocaSize +=
Size->getFixedValue();
1679 static_assert(InlinerVectorBonusPercent == 0,
"vector bonus assumed to be 0");
1683 return BB.getTerminator()->getNumSuccessors() > 1;
1686 Threshold += Threshold / 2;
1694 unsigned AllocaThresholdBonus =
1695 (Threshold * ArgAllocaSize->getFixedValue()) / AllocaSize;
1697 return AllocaThresholdBonus;
1703 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
1708 CommonTTI.getPeelingPreferences(L, SE, PP);
1712 return getQuarterRateInstrCost(
CostKind);
1716 return ST->hasFullRate64Ops()
1717 ? getFullRateInstrCost()
1718 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(
CostKind)
1719 : getQuarterRateInstrCost(
CostKind);
1722std::pair<InstructionCost, MVT>
1723GCNTTIImpl::getTypeLegalizationCost(
Type *Ty)
const {
1725 auto Size =
DL.getTypeSizeInBits(Ty);
1737 return ST->hasPrefetch() ? 128 : 0;
1748 LB.
push_back({
"amdgpu-max-num-workgroups[0]", MaxNumWorkgroups[0]});
1749 LB.push_back({
"amdgpu-max-num-workgroups[1]", MaxNumWorkgroups[1]});
1750 LB.push_back({
"amdgpu-max-num-workgroups[2]", MaxNumWorkgroups[2]});
1751 std::pair<unsigned, unsigned> FlatWorkGroupSize =
1752 ST->getFlatWorkGroupSizes(
F);
1753 LB.push_back({
"amdgpu-flat-work-group-size[0]", FlatWorkGroupSize.first});
1754 LB.push_back({
"amdgpu-flat-work-group-size[1]", FlatWorkGroupSize.second});
1755 std::pair<unsigned, unsigned> WavesPerEU = ST->getWavesPerEU(
F);
1756 LB.push_back({
"amdgpu-waves-per-eu[0]", WavesPerEU.first});
1757 LB.push_back({
"amdgpu-waves-per-eu[1]", WavesPerEU.second});
1762 if (!ST->hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1769 Attribute IEEEAttr =
F->getFnAttribute(
"amdgpu-ieee");
1784 if ((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1785 VecTy->getElementType()->isIntegerTy(8)) {
1796 if (VecTy->getElementType()->isIntegerTy(8)) {
1808 case Intrinsic::amdgcn_wave_shuffle:
1815 if (isAlwaysUniform(V))
1818 if (isSourceOfDivergence(V))
1826 bool HasBaseReg, int64_t Scale,
1827 unsigned AddrSpace)
const {
1828 if (HasBaseReg && Scale != 0) {
1832 if (getST()->hasScaleOffset() && Ty && Ty->isSized() &&
1852 unsigned EffInsnsA =
A.Insns +
A.ScaleCost;
1853 unsigned EffInsnsB =
B.Insns +
B.ScaleCost;
1855 return std::tie(EffInsnsA,
A.NumIVMuls,
A.AddRecCost,
A.NumBaseAdds,
1856 A.SetupCost,
A.ImmCost,
A.NumRegs) <
1857 std::tie(EffInsnsB,
B.NumIVMuls,
B.AddRecCost,
B.NumBaseAdds,
1858 B.SetupCost,
B.ImmCost,
B.NumRegs);
1875 case Intrinsic::amdgcn_wave_shuffle:
1878 return UniformArgs[0] || UniformArgs[1];
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static unsigned getNumElements(Type *Ty)
This file implements the SmallBitVector class.
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
bool isValid() const
Return true if the attribute is any kind of attribute.
LLVM Basic Block Representation.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
unsigned getArgOperandNo(const Use *U) const
Given a use for a arg operand, get the arg operand number that corresponds to it.
This class represents a function call, abstracting a target machine's calling convention.
Conditional Branch instruction.
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool isUniform(const Instruction *I, const SmallBitVector &UniformArgs) const override
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLSRCostLess(const TTI::LSRCost &A, const TTI::LSRCost &B) const override
bool shouldPrefetchAddressSpace(unsigned AS) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool shouldDropLSRSolutionIfLessProfitable() const override
int getInliningLastCallToStaticBonus() const override
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
bool isNumRegsMajorCostOfLSR() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
InstructionUniformity getInstructionUniformity(const Value *V) const override
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
LLVM_ABI bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
FastMathFlags getFlags() const
Type * getReturnType() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A Module instance is used to store all the information related to an LLVM module.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
StringRef - Represent a constant reference to a string, i.e.
std::vector< AsmOperandInfo > AsmOperandInfoVector
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isFlatGlobalAddrSpace(unsigned AS)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
bool isExtendedGlobalAddrSpace(unsigned AS)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ FADD
Simple binary floating point operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ AND
Bitwise operators - logical and, logical or, logical xor.
LLVM_ABI int getInstrCost()
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract_or_null(Y &&MD)
Extract a Value from Metadata, allowing null.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for a loop.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
@ Custom
The result values require a custom uniformity check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr DenormalMode getPreserveSign()
uint64_t getScalarSizeInBits() const
Information about a load/store intrinsic defined by the target.
bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const
const unsigned PragmaCount
const bool PragmaEnableUnroll