29#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "amdgpu-codegenprepare"
47 "amdgpu-codegenprepare-widen-constant-loads",
48 cl::desc(
"Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
53 BreakLargePHIs(
"amdgpu-codegenprepare-break-large-phis",
54 cl::desc(
"Break large PHI nodes for DAGISel"),
58 ForceBreakLargePHIs(
"amdgpu-codegenprepare-force-break-large-phis",
59 cl::desc(
"For testing purposes, always break large "
60 "PHIs even if it isn't profitable."),
64 "amdgpu-codegenprepare-break-large-phis-threshold",
65 cl::desc(
"Minimum type size in bits for breaking large PHI nodes"),
69 "amdgpu-codegenprepare-mul24",
70 cl::desc(
"Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
76 "amdgpu-codegenprepare-expand-div64",
77 cl::desc(
"Expand 64-bit division in AMDGPUCodeGenPrepare"),
84 "amdgpu-codegenprepare-disable-idiv-expansion",
85 cl::desc(
"Prevent expanding integer division in AMDGPUCodeGenPrepare"),
91 "amdgpu-codegenprepare-disable-fdiv-expansion",
92 cl::desc(
"Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
96class AMDGPUCodeGenPrepareImpl
97 :
public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
107 const bool HasFP32DenormalFlush;
108 bool FlowChanged =
false;
109 mutable Function *SqrtF32 =
nullptr;
110 mutable Function *LdexpF32 =
nullptr;
119 DT(DT), UA(UA),
DL(
F.getDataLayout()),
129 F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
139 F.getParent(), Intrinsic::ldexp,
140 {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
144 bool canBreakPHINode(
const PHINode &
I);
147 bool isLegalFloatingTy(
const Type *
T)
const;
155 bool canIgnoreDenormalInput(
const Value *V,
const Instruction *CtxI)
const {
156 return HasFP32DenormalFlush ||
163 unsigned numBitsUnsigned(
Value *
Op)
const;
168 unsigned numBitsSigned(
Value *
Op)
const;
181 unsigned MaxDivBits,
bool Signed)
const;
186 bool IsDiv,
bool IsSigned)
const;
190 bool IsDiv,
bool IsSigned)
const;
208 bool canWidenScalarExtLoad(
LoadInst &
I)
const;
222 float ReqdAccuracy)
const;
227 float ReqdAccuracy)
const;
229 std::pair<Value *, Value *> getFrexpResults(
IRBuilder<> &Builder,
233 bool IsNegative)
const;
240 bool IsNegative)
const;
270 if (!ExpandDiv64InIR)
274 StringRef getPassName()
const override {
return "AMDGPU IR optimizations"; }
279bool AMDGPUCodeGenPrepareImpl::run() {
280 BreakPhiNodesCache.clear();
281 bool MadeChange =
false;
293 while (!DeadVals.empty()) {
301bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(
const Type *Ty)
const {
306bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &
I)
const {
307 Type *Ty =
I.getType();
308 int TySize =
DL.getTypeSizeInBits(Ty);
309 Align Alignment =
DL.getValueOrABITypeAlignment(
I.getAlign(), Ty);
311 return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.
isUniform(&
I);
314unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(
Value *
Op)
const {
318unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(
Value *
Op)
const {
330 for (
int I = 0,
E = VT->getNumElements();
I !=
E; ++
I)
331 Values.
push_back(Builder.CreateExtractElement(V,
I));
337 if (!Ty->isVectorTy()) {
343 for (
int I = 0,
E = Values.
size();
I !=
E; ++
I)
344 NewVal = Builder.CreateInsertElement(NewVal, Values[
I],
I);
349bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &
I)
const {
350 if (
I.getOpcode() != Instruction::Mul)
353 Type *Ty =
I.getType();
365 Builder.SetCurrentDebugLocation(
I.getDebugLoc());
367 unsigned LHSBits = 0, RHSBits = 0;
368 bool IsSigned =
false;
370 if (ST.
hasMulU24() && (LHSBits = numBitsUnsigned(
LHS)) <= 24 &&
371 (RHSBits = numBitsUnsigned(
RHS)) <= 24) {
374 }
else if (ST.
hasMulI24() && (LHSBits = numBitsSigned(
LHS)) <= 24 &&
375 (RHSBits = numBitsSigned(
RHS)) <= 24) {
387 IntegerType *I32Ty = Builder.getInt32Ty();
388 IntegerType *IntrinTy =
Size > 32 ? Builder.getInt64Ty() : I32Ty;
389 Type *DstTy = LHSVals[0]->getType();
391 for (
int I = 0,
E = LHSVals.
size();
I !=
E; ++
I) {
392 Value *
LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[
I], I32Ty)
393 : Builder.CreateZExtOrTrunc(LHSVals[
I], I32Ty);
394 Value *
RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[
I], I32Ty)
395 : Builder.CreateZExtOrTrunc(RHSVals[
I], I32Ty);
397 IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
399 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
400 : Builder.CreateZExtOrTrunc(Result, DstTy);
406 I.replaceAllUsesWith(NewVal);
407 DeadVals.push_back(&
I);
427bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO)
const {
448 if (!CBO || !CT || !CF)
475 Builder.setFastMathFlags(FPOp->getFastMathFlags());
481 DeadVals.push_back(&BO);
483 DeadVals.push_back(CastOp);
484 DeadVals.push_back(Sel);
488std::pair<Value *, Value *>
489AMDGPUCodeGenPrepareImpl::getFrexpResults(
IRBuilder<> &Builder,
491 Type *Ty = Src->getType();
504 : Builder.CreateExtractValue(Frexp, {1});
505 return {FrexpMant, FrexpExp};
511 bool IsNegative)
const {
526 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
529 return Builder.
CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
535 FastMathFlags FMF)
const {
545 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder,
RHS);
550 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder,
LHS);
562 FastMathFlags FMF)
const {
563 Type *Ty = Src->getType();
567 Builder.
CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
570 Value *InputScaleFactor =
577 Value *OutputScaleFactor =
579 return Builder.
CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
590 Type *Ty = Src->getType();
594 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
595 Constant *One = ConstantFP::get(Ty, 1.0);
596 Constant *InputScale = ConstantFP::get(Ty, 0x1.0p+24);
598 ConstantFP::get(Ty, IsNegative ? -0x1.0p+12 : 0x1.0p+12);
600 Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
602 Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
603 Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
604 Value *OutputScaleFactor = Builder.CreateSelect(
605 NeedScale, OutputScale, IsNegative ? ConstantFP::get(Ty, -1.0) : One);
607 return Builder.CreateFMul(Rsq, OutputScaleFactor);
613 FastMathFlags SqrtFMF,
614 FastMathFlags DivFMF,
615 const Instruction *CtxI,
616 bool IsNegative)
const {
638 bool MaybePosInf = !SqrtFMF.
noInfs() && !DivFMF.
noInfs();
639 bool MaybeZero = !DivFMF.
noInfs();
641 DenormalMode DenormMode;
648 if (Interested !=
fcNone) {
653 DenormMode =
F.getDenormalMode(
X->getType()->getFltSemantics());
659 if (MaybeZero || MaybePosInf) {
661 if (MaybePosInf && MaybeZero) {
662 if (DenormMode.
Input != DenormalMode::DenormalModeKind::Dynamic) {
677 }
else if (MaybeZero) {
690 Value *
E = Builder.
CreateFMA(NegXY0, Y0, ConstantFP::get(
X->getType(), 1.0));
695 ConstantFP::get(
X->getType(), 0.5));
697 return Builder.
CreateFMA(Y0E, EFMA, IsNegative ? NegY0 : Y0);
700bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(FastMathFlags DivFMF,
701 FastMathFlags SqrtFMF)
const {
707Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
709 const FastMathFlags SqrtFMF,
const Instruction *CtxI)
const {
720 bool IsNegative =
false;
725 IRBuilder<>::FastMathFlagGuard Guard(Builder);
730 canIgnoreDenormalInput(Den, CtxI)) {
741 return emitRsqF64(Builder, Den, SqrtFMF, DivFMF, CtxI, IsNegative);
755 Value *Den, FastMathFlags FMF,
756 const Instruction *CtxI)
const {
763 bool IsNegative =
false;
768 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
789 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
798 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
803 Value *Recip = emitRcpIEEE1ULP(Builder, Den,
false);
817Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
820 if (ReqdAccuracy < 2.5f)
826 bool NumIsOne =
false;
828 if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0))
836 if (!HasFP32DenormalFlush && !NumIsOne)
839 return Builder.
CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {Num, Den});
842Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
844 FastMathFlags SqrtFMF,
Value *RsqOp,
const Instruction *FDivInst,
845 float ReqdDivAccuracy)
const {
848 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
856 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
864 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
868 return emitFrexpDiv(Builder, Num, Den, DivFMF);
886bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
887 if (DisableFDivExpand)
902 FastMathFlags SqrtFMF;
907 Value *RsqOp =
nullptr;
909 if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
910 DenII->hasOneUse()) {
912 SqrtFMF = SqrtOp->getFastMathFlags();
913 if (canOptimizeWithRsq(DivFMF, SqrtFMF))
914 RsqOp = SqrtOp->getOperand(0);
918 if (!IsFloat && !RsqOp)
930 const bool AllowInaccurateRcp = DivFMF.
approxFunc();
931 if (!RsqOp && AllowInaccurateRcp)
935 if (IsFloat && ReqdAccuracy < 1.0f)
952 for (
int I = 0,
E = NumVals.
size();
I !=
E; ++
I) {
953 Value *NumElt = NumVals[
I];
954 Value *DenElt = DenVals[
I];
955 Value *RsqDenElt = RsqOp ? RsqDenVals[
I] :
nullptr;
958 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
967 NewEltInst->copyMetadata(FDiv);
970 ResultVals[
I] = NewElt;
978 DeadVals.push_back(&FDiv);
989 Value *LHS_EXT64 = Builder.CreateZExt(
LHS, I64Ty);
990 Value *RHS_EXT64 = Builder.CreateZExt(
RHS, I64Ty);
991 Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
992 Value *
Lo = Builder.CreateTrunc(MUL64, I32Ty);
993 Value *
Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
994 Hi = Builder.CreateTrunc(
Hi, I32Ty);
995 return std::pair(
Lo,
Hi);
1006unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &
I,
Value *Num,
1008 unsigned MaxDivBits,
1009 bool IsSigned)
const {
1016 unsigned DivBits = SSBits - RHSSignBits + 1;
1017 if (DivBits > MaxDivBits)
1022 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1023 DivBits = SSBits - SignBits + 1;
1033 unsigned DivBits = SSBits - RHSSignBits;
1034 if (DivBits > MaxDivBits)
1042 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1043 DivBits = SSBits - SignBits;
1050 BinaryOperator &
I,
Value *Num,
1051 Value *Den,
bool IsDiv,
1052 bool IsSigned)
const {
1053 unsigned DivBits = getDivNumBits(
I, Num, Den, 24, IsSigned);
1056 return expandDivRem24Impl(Builder,
I, Num, Den, DivBits, IsDiv, IsSigned);
1059Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
1061 unsigned DivBits,
bool IsDiv,
bool IsSigned)
const {
1067 ConstantInt *One = Builder.
getInt32(1);
1111 {FQNeg->
getType()}, {FQNeg, FB, FA}, FQ);
1139 if (DivBits != 0 && DivBits < 32) {
1142 int InRegBits = 32 - DivBits;
1144 Res = Builder.
CreateShl(Res, InRegBits);
1147 ConstantInt *TruncMask
1148 = Builder.
getInt32((UINT64_C(1) << DivBits) - 1);
1149 Res = Builder.
CreateAnd(Res, TruncMask);
1160bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &
I,
1166 if (
C->getType()->getScalarSizeInBits() <= 32)
1182 if (BinOpDen->getOpcode() == Instruction::Shl &&
1199 return Builder.CreateAShr(V, Builder.getInt32(31));
1206 assert(
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1207 Opc == Instruction::SRem ||
Opc == Instruction::SDiv);
1213 if (divHasSpecialOptimization(
I,
X,
Y))
1216 bool IsDiv =
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv;
1217 bool IsSigned =
Opc == Instruction::SRem ||
Opc == Instruction::SDiv;
1219 Type *Ty =
X->getType();
1233 if (
Value *Res = expandDivRem24(Builder,
I,
X,
Y, IsDiv, IsSigned)) {
1239 ConstantInt *One = Builder.
getInt32(1);
1241 Value *Sign =
nullptr;
1246 Sign = IsDiv ? Builder.
CreateXor(SignX, SignY) : SignX;
1327 BinaryOperator &
I,
Value *Num,
1329 if (!ExpandDiv64InIR && divHasSpecialOptimization(
I, Num, Den))
1334 bool IsDiv =
Opc == Instruction::SDiv ||
Opc == Instruction::UDiv;
1335 bool IsSigned =
Opc == Instruction::SDiv ||
Opc == Instruction::SRem;
1337 unsigned NumDivBits = getDivNumBits(
I, Num, Den, 32, IsSigned);
1338 if (NumDivBits > 32)
1341 Value *Narrowed =
nullptr;
1342 if (NumDivBits <= 24) {
1343 Narrowed = expandDivRem24Impl(Builder,
I, Num, Den, NumDivBits,
1345 }
else if (NumDivBits <= 32) {
1346 Narrowed = expandDivRem32(Builder,
I, Num, Den);
1357void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &
I)
const {
1360 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv) {
1365 if (
Opc == Instruction::URem ||
Opc == Instruction::SRem) {
1385bool AMDGPUCodeGenPrepareImpl::tryNarrowMathIfNoOverflow(Instruction *
I) {
1386 unsigned Opc =
I->getOpcode();
1387 Type *OldType =
I->getType();
1389 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1394 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1396 "Instruction::Mul.");
1400 MaxBitsNeeded = std::max<unsigned>(
bit_ceil(MaxBitsNeeded), 8);
1401 Type *NewType =
DL.getSmallestLegalIntType(
I->getContext(), MaxBitsNeeded);
1405 if (NewBit >= OrigBit)
1417 int NumOfNonConstOps = 2;
1420 NumOfNonConstOps = 1;
1430 if (NewCost >= OldCost)
1441 DeadVals.push_back(
I);
1445bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &
I) {
1446 if (foldBinOpIntoSelect(
I))
1449 if (UseMul24Intrin && replaceMulWithMul24(
I))
1451 if (tryNarrowMathIfNoOverflow(&
I))
1456 Type *Ty =
I.getType();
1457 Value *NewDiv =
nullptr;
1462 if ((
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1463 Opc == Instruction::SRem ||
Opc == Instruction::SDiv) &&
1465 !DisableIDivExpand) {
1466 Value *Num =
I.getOperand(0);
1467 Value *Den =
I.getOperand(1);
1474 for (
unsigned N = 0,
E = VT->getNumElements();
N !=
E; ++
N) {
1479 if (ScalarSize <= 32) {
1480 NewElt = expandDivRem32(Builder,
I, NumEltN, DenEltN);
1486 NewElt = shrinkDivRem64(Builder,
I, NumEltN, DenEltN);
1500 NewEltI->copyIRFlags(&
I);
1505 if (ScalarSize <= 32)
1506 NewDiv = expandDivRem32(Builder,
I, Num, Den);
1508 NewDiv = shrinkDivRem64(Builder,
I, Num, Den);
1515 I.replaceAllUsesWith(NewDiv);
1516 DeadVals.push_back(&
I);
1521 if (ExpandDiv64InIR) {
1523 for (BinaryOperator *Div : Div64ToExpand) {
1524 expandDivRem64(*Div);
1533bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &
I) {
1539 canWidenScalarExtLoad(
I)) {
1549 if (
auto *
Range =
WidenLoad->getMetadata(LLVMContext::MD_range)) {
1550 ConstantInt *
Lower =
1553 if (
Lower->isNullValue()) {
1554 WidenLoad->setMetadata(LLVMContext::MD_range,
nullptr);
1562 WidenLoad->setMetadata(LLVMContext::MD_range,
1567 int TySize =
DL.getTypeSizeInBits(
I.getType());
1572 DeadVals.push_back(&
I);
1579bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &
I) {
1600 Value *Fract =
nullptr;
1601 if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
1602 CmpVal == matchFractPat(*IIFalse)) {
1604 Fract = applyFractPat(Builder, CmpVal);
1605 }
else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
1606 CmpVal == matchFractPat(*IITrue)) {
1608 Fract = applyFractPat(Builder, CmpVal);
1613 I.replaceAllUsesWith(Fract);
1614 DeadVals.push_back(&
I);
1621 return IA && IB && IA->getParent() == IB->getParent();
1631 const Value *CurVal = V;
1634 BitVector EltsCovered(FVT->getNumElements());
1641 if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
1644 const auto *VecSrc = IE->getOperand(0);
1653 EltsCovered.
set(Idx->getZExtValue());
1656 if (EltsCovered.
all())
1683 const auto [It, Inserted] = SeenPHIs.
insert(&
I);
1687 for (
const Value *Inc :
I.incoming_values()) {
1692 for (
const User *U :
I.users()) {
1698bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(
const PHINode &
I) {
1700 if (
const auto It = BreakPhiNodesCache.find(&
I);
1701 It != BreakPhiNodesCache.end())
1710 SmallPtrSet<const PHINode *, 8> WorkList;
1716 for (
const PHINode *WLP : WorkList) {
1717 assert(BreakPhiNodesCache.count(WLP) == 0);
1732 const auto Threshold = (
alignTo(WorkList.size() * 2, 3) / 3);
1733 unsigned NumBreakablePHIs = 0;
1734 bool CanBreak =
false;
1735 for (
const PHINode *Cur : WorkList) {
1743 if (++NumBreakablePHIs >= Threshold) {
1750 for (
const PHINode *Cur : WorkList)
1751 BreakPhiNodesCache[Cur] = CanBreak;
1800 Value *&Res = SlicedVals[{BB, Inc}];
1806 B.SetCurrentDebugLocation(IncInst->getDebugLoc());
1812 Res =
B.CreateShuffleVector(Inc, Mask, NewValName);
1814 Res =
B.CreateExtractElement(Inc,
Idx, NewValName);
1823bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &
I) {
1839 DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
1842 if (!ForceBreakLargePHIs && !canBreakPHINode(
I))
1845 std::vector<VectorSlice> Slices;
1852 const unsigned EltSize =
DL.getTypeSizeInBits(EltTy);
1854 if (EltSize == 8 || EltSize == 16) {
1855 const unsigned SubVecSize = (32 / EltSize);
1857 for (
unsigned End =
alignDown(NumElts, SubVecSize); Idx < End;
1859 Slices.emplace_back(SubVecTy, Idx, SubVecSize);
1863 for (; Idx < NumElts; ++Idx)
1864 Slices.emplace_back(EltTy, Idx, 1);
1867 assert(Slices.size() > 1);
1873 B.SetCurrentDebugLocation(
I.getDebugLoc());
1875 unsigned IncNameSuffix = 0;
1876 for (VectorSlice &S : Slices) {
1879 B.SetInsertPoint(
I.getParent()->getFirstNonPHIIt());
1880 S.NewPHI =
B.CreatePHI(S.Ty,
I.getNumIncomingValues());
1882 for (
const auto &[Idx, BB] :
enumerate(
I.blocks())) {
1883 S.NewPHI->addIncoming(S.getSlicedVal(BB,
I.getIncomingValue(Idx),
1884 "largephi.extractslice" +
1885 std::to_string(IncNameSuffix++)),
1892 unsigned NameSuffix = 0;
1893 for (VectorSlice &S : Slices) {
1894 const auto ValName =
"largephi.insertslice" + std::to_string(NameSuffix++);
1896 Vec =
B.CreateInsertVector(FVT, Vec, S.NewPHI, S.Idx, ValName);
1898 Vec =
B.CreateInsertElement(Vec, S.NewPHI, S.Idx, ValName);
1901 I.replaceAllUsesWith(Vec);
1902 DeadVals.push_back(&
I);
1925 Load && Load->hasMetadata(LLVMContext::MD_nonnull))
1944 assert(SrcPtrKB.getBitWidth() ==
DL.getPointerSizeInBits(AS));
1945 assert((NullVal == 0 || NullVal == -1) &&
1946 "don't know how to check for this null value!");
1947 return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
1950bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &
I) {
1954 if (
I.getType()->isVectorTy())
1959 const unsigned SrcAS =
I.getSrcAddressSpace();
1960 const unsigned DstAS =
I.getDestAddressSpace();
1962 bool CanLower =
false;
1980 auto *Intrin =
B.CreateIntrinsic(
1981 I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)});
1982 I.replaceAllUsesWith(Intrin);
1983 DeadVals.push_back(&
I);
1987bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &
I) {
1988 switch (
I.getIntrinsicID()) {
1989 case Intrinsic::minnum:
1990 case Intrinsic::minimumnum:
1991 case Intrinsic::minimum:
1992 return visitFMinLike(
I);
1993 case Intrinsic::sqrt:
1994 return visitSqrt(
I);
2007Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &
I) {
2015 if (IID != Intrinsic::minnum && IID != Intrinsic::minimum &&
2016 IID != Intrinsic::minimumnum)
2019 Type *Ty =
I.getType();
2023 Value *Arg0 =
I.getArgOperand(0);
2024 Value *Arg1 =
I.getArgOperand(1);
2032 One.convert(
C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);
2054 for (
unsigned I = 0,
E = FractVals.
size();
I !=
E; ++
I) {
2062bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &
I) {
2063 Value *FractArg = matchFractPat(
I);
2073 FastMathFlags FMF =
I.getFastMathFlags();
2077 Value *Fract = applyFractPat(Builder, FractArg);
2079 I.replaceAllUsesWith(Fract);
2080 DeadVals.push_back(&
I);
2085bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
2101 if (ReqdAccuracy < 1.0f)
2105 bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
2109 if (!CanTreatAsDAZ && ReqdAccuracy < 2.0f)
2117 for (
int I = 0,
E = SrcVals.
size();
I !=
E; ++
I) {
2119 ResultVals[
I] = Builder.
CreateCall(getSqrtF32(), SrcVals[
I]);
2121 ResultVals[
I] = emitSqrtIEEE2ULP(Builder, SrcVals[
I], SqrtFMF);
2127 DeadVals.push_back(&Sqrt);
2131bool AMDGPUCodeGenPrepare::runOnFunction(Function &
F) {
2132 if (skipFunction(
F))
2135 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
2139 const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
2140 const TargetLibraryInfo *TLI =
2141 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2142 AssumptionCache *AC =
2143 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2144 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
2145 const DominatorTree *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
2147 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
2148 return AMDGPUCodeGenPrepareImpl(
F, TM, TLI, AC, DT, UA).run();
2158 AMDGPUCodeGenPrepareImpl Impl(
F, ATM, TLI, AC, DT, UA);
2162 if (!Impl.FlowChanged)
2168 "AMDGPU IR optimizations",
false,
false)
2175char AMDGPUCodeGenPrepare::
ID = 0;
2178 return new AMDGPUCodeGenPrepare();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl< Value * > &Values)
static void extractValues(IRBuilder<> &Builder, SmallVectorImpl< Value * > &Values, Value *V)
static Value * getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static bool isInterestingPHIIncomingValue(const Value *V)
static SelectInst * findSelectThroughCast(Value *V, CastInst *&Cast)
static std::pair< Value *, Value * > getMul64(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static Value * emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src, bool IsNegative)
Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
static Value * getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL)
static void collectPHINodes(const PHINode &I, SmallPtrSet< const PHINode *, 8 > &SeenPHIs)
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL, const AMDGPUTargetMachine &TM, unsigned AS)
static bool areInSameBB(const Value *A, const Value *B)
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static cl::opt< cl::boolOrDefault > EnableGlobalISelOption("global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector"))
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts)
Value * getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName)
Slice Inc according to the information contained within this slice.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
bool hasMadMacF32Insts() const
bool has16BitInsts() const
bool hasFastFMAF32() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
bool all() const
all - Returns true if all bits are set.
Represents analyses that only rely on functions' control flow.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setFast(bool B=true)
bool allowReciprocal() const
void setNoNaNs(bool B=true)
bool allowContract() const
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
LLVM_ABI Value * createIsFPClass(Value *FPNum, unsigned Test)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFMA(Value *Factor1, Value *Factor2, Value *Summand, FMFSource FMFSource={}, const Twine &Name="")
Create call to the fma intrinsic.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
LLVM_ABI const fltSemantics & getFltSemantics() const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cstfp_pred_ty< is_nonnan > m_NonNaN()
Match a non-NaN FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool expandRemainderUpTo64Bits(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
LLVM_ABI bool expandDivisionUpTo64Bits(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionPass * createAMDGPUCodeGenPreparePass()
To bit_cast(const From &from) noexcept
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
static constexpr DenormalMode getPreserveSign()
bool isNonNegative() const
Returns true if this value is known to be non-negative.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
bool isNegative() const
Returns true if this value is known to be negative.
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.
LLVM_ABI bool isKnownNeverLogicalZero(DenormalMode Mode) const
Return true if it's know this can never be interpreted as a zero.
bool isKnownNeverPosInfinity() const
Return true if it's known this can never be +infinity.