24#include "llvm/IR/IntrinsicsDirectX.h"
34#define DEBUG_TYPE "dxil-intrinsic-expansion"
49 if (IsRaw && M->getTargetTriple().getDXILVersion() >
VersionTuple(1, 2))
58 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
73 ConstantInt::get(IType, 0x7c00))
74 : ConstantInt::get(IType, 0x7c00);
81 ConstantInt::get(IType, 0xfc00))
82 : ConstantInt::get(IType, 0xfc00);
84 Value *IVal = Builder.CreateBitCast(Val, PosInf->
getType());
85 Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
86 Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
87 Value *B3 = Builder.CreateOr(B1, B2);
93 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
109 ConstantInt::get(IType, 0x7c00))
110 : ConstantInt::get(IType, 0x7c00);
116 ConstantInt::get(IType, 0x3ff))
117 : ConstantInt::get(IType, 0x3ff);
124 ConstantInt::get(IType, 0))
125 : ConstantInt::get(IType, 0);
127 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
128 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
129 Value *B1 = Builder.CreateICmpEQ(Exp, ExpBitMask);
131 Value *Sig = Builder.CreateAnd(IVal, SigBitMask);
132 Value *B2 = Builder.CreateICmpNE(Sig, Zero);
133 Value *B3 = Builder.CreateAnd(B1, B2);
139 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
155 ConstantInt::get(IType, 0x7c00))
156 : ConstantInt::get(IType, 0x7c00);
158 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
159 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
160 Value *B1 = Builder.CreateICmpNE(Exp, ExpBitMask);
166 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
182 ConstantInt::get(IType, 0x7c00))
183 : ConstantInt::get(IType, 0x7c00);
189 ConstantInt::get(IType, 0))
190 : ConstantInt::get(IType, 0);
192 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
193 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
194 Value *NotAllZeroes = Builder.CreateICmpNE(Exp, Zero);
195 Value *NotAllOnes = Builder.CreateICmpNE(Exp, ExpBitMask);
196 Value *B1 = Builder.CreateAnd(NotAllZeroes, NotAllOnes);
201 switch (
F.getIntrinsicID()) {
202 case Intrinsic::assume:
204 case Intrinsic::atan2:
205 case Intrinsic::fshl:
206 case Intrinsic::fshr:
208 case Intrinsic::is_fpclass:
210 case Intrinsic::log10:
212 case Intrinsic::powi:
213 case Intrinsic::dx_all:
214 case Intrinsic::dx_any:
215 case Intrinsic::dx_cross:
216 case Intrinsic::dx_uclamp:
217 case Intrinsic::dx_sclamp:
218 case Intrinsic::dx_nclamp:
219 case Intrinsic::dx_degrees:
220 case Intrinsic::dx_isinf:
221 case Intrinsic::dx_isnan:
222 case Intrinsic::dx_lerp:
223 case Intrinsic::dx_normalize:
224 case Intrinsic::dx_fdot:
225 case Intrinsic::dx_sdot:
226 case Intrinsic::dx_udot:
227 case Intrinsic::dx_sign:
228 case Intrinsic::dx_step:
229 case Intrinsic::dx_radians:
230 case Intrinsic::dx_interlocked_add:
231 case Intrinsic::dx_interlocked_or:
232 case Intrinsic::usub_sat:
233 case Intrinsic::vector_reduce_add:
234 case Intrinsic::vector_reduce_fadd:
235 case Intrinsic::matrix_multiply:
236 case Intrinsic::matrix_transpose:
238 case Intrinsic::dx_resource_load_rawbuffer:
240 F.getParent(),
F.getReturnType()->getStructElementType(0),
242 case Intrinsic::dx_resource_load_typedbuffer:
244 F.getParent(),
F.getReturnType()->getStructElementType(0),
246 case Intrinsic::dx_resource_store_rawbuffer:
248 F.getParent(),
F.getFunctionType()->getParamType(3),
true);
249 case Intrinsic::dx_resource_store_typedbuffer:
251 F.getParent(),
F.getFunctionType()->getParamType(2),
false);
259 Type *Ty =
A->getType();
263 Value *Cmp = Builder.CreateICmpULT(
A,
B,
"usub.cmp");
264 Value *
Sub = Builder.CreateSub(
A,
B,
"usub.sub");
265 Value *Zero = ConstantInt::get(Ty, 0);
266 return Builder.CreateSelect(Cmp, Zero,
Sub,
"usub.sat");
270 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
271 IntrinsicId == Intrinsic::vector_reduce_fadd);
274 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
277 Type *Ty =
X->getType();
279 unsigned XVecSize = XVec->getNumElements();
280 Value *Sum = Builder.CreateExtractElement(
X,
static_cast<uint64_t>(0));
286 Sum = Builder.CreateFAdd(Sum, StartValue);
290 for (
unsigned I = 1;
I < XVecSize;
I++) {
291 Value *Elt = Builder.CreateExtractElement(
X,
I);
293 Sum = Builder.CreateFAdd(Sum, Elt);
295 Sum = Builder.CreateAdd(Sum, Elt);
304 Type *Ty =
X->getType();
310 ConstantInt::get(EltTy, 0))
311 : ConstantInt::get(EltTy, 0);
312 auto *V = Builder.CreateSub(Zero,
X);
313 return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {
X, V},
nullptr,
327 Value *op0_x = Builder.CreateExtractElement(op0, (
uint64_t)0,
"x0");
328 Value *op0_y = Builder.CreateExtractElement(op0, 1,
"x1");
329 Value *op0_z = Builder.CreateExtractElement(op0, 2,
"x2");
331 Value *op1_x = Builder.CreateExtractElement(op1, (
uint64_t)0,
"y0");
332 Value *op1_y = Builder.CreateExtractElement(op1, 1,
"y1");
333 Value *op1_z = Builder.CreateExtractElement(op1, 2,
"y2");
336 Value *xy = Builder.CreateFMul(x0, y1);
337 Value *yx = Builder.CreateFMul(y0, x1);
338 return Builder.CreateFSub(xy, yx, Orig->
getName());
341 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
342 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
343 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
346 cross = Builder.CreateInsertElement(cross, yz_zy, (
uint64_t)0);
347 cross = Builder.CreateInsertElement(cross, zx_xz, 1);
348 cross = Builder.CreateInsertElement(cross, xy_yx, 2);
356 Type *ATy =
A->getType();
357 [[maybe_unused]]
Type *BTy =
B->getType();
367 int NumElts = AVec->getNumElements();
370 DotIntrinsic = Intrinsic::dx_dot2;
373 DotIntrinsic = Intrinsic::dx_dot3;
376 DotIntrinsic = Intrinsic::dx_dot4;
380 "Invalid dot product input vector: length is outside 2-4");
385 for (
int I = 0;
I < NumElts; ++
I)
386 Args.push_back(Builder.CreateExtractElement(
A, Builder.getInt32(
I)));
387 for (
int I = 0;
I < NumElts; ++
I)
388 Args.push_back(Builder.CreateExtractElement(
B, Builder.getInt32(
I)));
389 return Builder.CreateIntrinsic(ATy->
getScalarType(), DotIntrinsic, Args,
404 assert(DotIntrinsic == Intrinsic::dx_sdot ||
405 DotIntrinsic == Intrinsic::dx_udot);
408 Type *ATy =
A->getType();
409 [[maybe_unused]]
Type *BTy =
B->getType();
419 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
421 : Intrinsic::dx_umad;
424 Result = Builder.CreateMul(Elt0, Elt1);
425 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
426 Elt0 = Builder.CreateExtractElement(
A,
I);
427 Elt1 = Builder.CreateExtractElement(
B,
I);
428 Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,
438 Type *Ty =
X->getType();
446 Value *NewX = Builder.CreateFMul(Log2eConst,
X);
447 CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
448 Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
460 switch (TCI->getZExtValue()) {
474 Type *FTy =
F->getType();
475 unsigned FNumElem = 0;
481 Type *ElemTy = FVecTy->getElementType();
482 FNumElem = FVecTy->getNumElements();
483 BitWidth = ElemTy->getPrimitiveSizeInBits();
490 Value *FBitCast = Builder.CreateBitCast(
F, BitCastTy);
491 switch (TCI->getZExtValue()) {
498 Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
500 Builder.CreateICmpEQ(FBitCast, NegZeroSplat,
"is.fpclass.negzero");
502 RetVal = Builder.CreateICmpEQ(FBitCast, NegZero,
"is.fpclass.negzero");
514 Type *Ty =
X->getType();
519 if (IntrinsicId == Intrinsic::dx_any)
520 return Builder.CreateOr(Result, Elt);
521 assert(IntrinsicId == Intrinsic::dx_all);
522 return Builder.CreateAnd(Result, Elt);
525 Value *Result =
nullptr;
526 if (!Ty->isVectorTy()) {
528 ? Builder.CreateFCmpUNE(
X, ConstantFP::get(EltTy, 0))
529 : Builder.CreateICmpNE(
X, ConstantInt::get(EltTy, 0));
534 ? Builder.CreateFCmpUNE(
537 ConstantFP::get(EltTy, 0)))
538 : Builder.CreateICmpNE(
541 ConstantInt::get(EltTy, 0)));
542 Result = Builder.CreateExtractElement(
Cond, (
uint64_t)0);
543 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
544 Value *Elt = Builder.CreateExtractElement(
Cond,
I);
545 Result = ApplyOp(IntrinsicId, Result, Elt);
556 auto *V = Builder.CreateFSub(
Y,
X);
557 V = Builder.CreateFMul(S, V);
558 return Builder.CreateFAdd(
X, V,
"dx.lerp");
565 Type *Ty =
X->getType();
571 ConstantFP::get(EltTy, LogConstVal))
572 : ConstantFP::get(EltTy, LogConstVal);
573 CallInst *Log2Call = Builder.CreateIntrinsicWithoutFolding(
574 Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
577 return Builder.CreateFMul(Ln2Const, Log2Call);
594 const APFloat &fpVal = constantFP->getValueAPF();
598 return Builder.CreateFDiv(
X,
X);
606 const APFloat &fpVal = constantFP->getValueAPF();
611 Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,
613 nullptr,
"dx.rsqrt");
615 Value *MultiplicandVec =
616 Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);
617 return Builder.CreateFMul(
X, MultiplicandVec);
623 Type *Ty =
X->getType();
627 Value *Tan = Builder.CreateFDiv(
Y,
X);
629 CallInst *Atan = Builder.CreateIntrinsicWithoutFolding(
630 Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
638 Constant *Zero = ConstantFP::get(Ty, 0);
639 Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
640 Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
643 Value *Result = Atan;
644 Value *XLt0 = Builder.CreateFCmpOLT(
X, Zero);
645 Value *XEq0 = Builder.CreateFCmpOEQ(
X, Zero);
646 Value *YGe0 = Builder.CreateFCmpOGE(
Y, Zero);
647 Value *YLt0 = Builder.CreateFCmpOLT(
Y, Zero);
650 Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
651 Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
654 Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
655 Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
658 Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
659 Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
662 Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
663 Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
668template <
bool LeftFunnel>
677 unsigned BitWidth = Ty->getScalarSizeInBits();
679 "Can't use Mask to compute modulo and inverse");
694 Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);
699 Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
704 Value *NotShift = Builder.CreateNot(Shift);
705 Value *InverseShift = Builder.CreateAnd(NotShift, Mask);
707 Constant *One = ConstantInt::get(Ty, 1);
712 ShiftedA = Builder.CreateShl(
A, MaskedShift);
713 Value *ShiftB1 = Builder.CreateLShr(
B, One);
714 ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);
716 Value *ShiftA1 = Builder.CreateShl(
A, One);
717 ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);
718 ShiftedB = Builder.CreateLShr(
B, MaskedShift);
721 Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);
729 Type *Ty =
X->getType();
732 if (IntrinsicId == Intrinsic::powi)
733 Y = Builder.CreateSIToFP(
Y, Ty);
736 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
737 auto *
Mul = Builder.CreateFMul(Log2Call,
Y);
738 CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
739 Ty, Intrinsic::exp2, {
Mul},
nullptr,
"elt.exp2");
749 Type *Ty =
X->getType();
752 Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
753 Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
756 if (Ty != Ty->getScalarType()) {
764 return Builder.CreateSelect(
Cond, Zero, One);
769 Type *Ty =
X->getType();
772 return Builder.CreateFMul(
X, PiOver180);
784 return Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
795 "Only expand double or int64 scalars or vectors");
796 bool IsVector =
false;
797 unsigned ExtractNum = 2;
799 ExtractNum = 2 * VT->getNumElements();
801 assert(IsRaw || ExtractNum == 4 &&
"TypedBufferLoad vector must be size 2");
810 while (ExtractNum > 0) {
811 unsigned LoadNum = std::min(ExtractNum, 4u);
815 Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
818 LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
819 Value *Tmp = Builder.getInt32(4 *
Base * 2);
820 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
823 Value *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
827 Value *Extract = Builder.CreateExtractValue(Load, {0});
830 for (
unsigned I = 0;
I < LoadNum; ++
I)
832 Builder.CreateExtractElement(Extract, Builder.getInt32(
I)));
835 for (
unsigned I = 0;
I < LoadNum;
I += 2) {
836 Value *Combined =
nullptr;
839 Combined = Builder.CreateIntrinsic(
840 Builder.getDoubleTy(), Intrinsic::dx_asdouble,
841 {ExtractElements[I], ExtractElements[I + 1]});
846 Builder.CreateZExt(ExtractElements[
I], Builder.getInt64Ty());
848 Builder.CreateZExt(ExtractElements[
I + 1], Builder.getInt64Ty());
850 Value *ShiftedHi = Builder.CreateShl(
Hi, Builder.getInt64(32));
852 Combined = Builder.CreateOr(
Lo, ShiftedHi);
856 Result = Builder.CreateInsertElement(Result, Combined,
857 Builder.getInt32((
I / 2) +
Base));
862 ExtractNum -= LoadNum;
866 Value *CheckBit =
nullptr;
877 if (Indices[0] == 0) {
879 EVI->replaceAllUsesWith(Result);
882 assert(Indices[0] == 1 &&
"Unexpected type for typedbufferload");
887 for (
Value *L : Loads)
888 CheckBits.
push_back(Builder.CreateExtractValue(L, {1}));
889 CheckBit = Builder.CreateAnd(CheckBits);
891 EVI->replaceAllUsesWith(CheckBit);
893 EVI->eraseFromParent();
902 unsigned ValIndex = IsRaw ? 3 : 2;
907 "Only expand double or int64 scalars or vectors");
910 bool IsVector =
false;
911 unsigned ExtractNum = 2;
914 VecLen = VT->getNumElements();
915 assert(IsRaw || VecLen == 2 &&
"TypedBufferStore vector must be size 2");
916 ExtractNum = VecLen * 2;
925 Type *SplitElementTy = Int32Ty;
929 Value *LowBits =
nullptr;
930 Value *HighBits =
nullptr;
934 Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
936 LowBits = Builder.CreateExtractValue(Split, 0);
937 HighBits = Builder.CreateExtractValue(Split, 1);
941 Constant *ShiftAmt = Builder.getInt64(32);
947 LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
948 Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
949 HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
954 for (
unsigned I = 0;
I < VecLen; ++
I) {
956 Mask.push_back(
I + VecLen);
958 Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
960 Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
961 Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
968 while (ExtractNum > 0) {
969 unsigned StoreNum = std::min(ExtractNum, 4u);
971 Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
974 StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
976 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
980 for (
unsigned I = 0;
I < StoreNum; ++
I) {
981 Mask.push_back(
Base +
I);
986 SubVal = Builder.CreateShuffleVector(Val, Mask);
988 Args.push_back(SubVal);
990 Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
992 ExtractNum -= StoreNum;
1000 if (ClampIntrinsic == Intrinsic::dx_uclamp)
1001 return Intrinsic::umax;
1002 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1003 return Intrinsic::smax;
1004 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1005 return Intrinsic::maxnum;
1009 if (ClampIntrinsic == Intrinsic::dx_uclamp)
1010 return Intrinsic::umin;
1011 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1012 return Intrinsic::smin;
1013 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1014 return Intrinsic::minnum;
1022 Type *Ty =
X->getType();
1024 auto *MaxCall = Builder.CreateIntrinsic(Ty,
getMaxForClamp(ClampIntrinsic),
1025 {
X, Min},
nullptr,
"dx.max");
1026 return Builder.CreateIntrinsic(Ty,
getMinForClamp(ClampIntrinsic),
1027 {MaxCall, Max},
nullptr,
"dx.min");
1032 Type *Ty =
X->getType();
1035 return Builder.CreateFMul(
X, DegreesRatio);
1040 Type *Ty =
X->getType();
1050 GT = Builder.CreateFCmpOLT(Zero,
X);
1051 LT = Builder.CreateFCmpOLT(
X, Zero);
1054 GT = Builder.CreateICmpSLT(Zero,
X);
1055 LT = Builder.CreateICmpSLT(
X, Zero);
1058 Value *ZextGT = Builder.CreateZExt(GT, RetTy);
1059 Value *ZextLT = Builder.CreateZExt(LT, RetTy);
1061 return Builder.CreateSub(ZextGT, ZextLT);
1076 Type *EltTy = RetTy->getElementType();
1087 unsigned LHSSize = LHSRows * LHSCols;
1088 unsigned RHSSize = LHSCols * RHSCols;
1091 for (
unsigned I = 0;
I < LHSSize; ++
I)
1092 LHSElts[
I] = Builder.CreateExtractElement(
LHS,
I);
1093 for (
unsigned I = 0;
I < RHSSize; ++
I)
1094 RHSElts[
I] = Builder.CreateExtractElement(
RHS,
I);
1099 bool UseScalarFP = IsFP && (EltTy->
isDoubleTy() || LHSCols == 1);
1100 if (IsFP && !UseScalarFP) {
1103 FloatDotID = Intrinsic::dx_dot2;
1106 FloatDotID = Intrinsic::dx_dot3;
1109 FloatDotID = Intrinsic::dx_dot4;
1113 "Invalid matrix inner dimension for dot product: must be 2-4");
1118 for (
unsigned C = 0;
C < RHSCols; ++
C) {
1119 for (
unsigned R = 0; R < LHSRows; ++R) {
1122 for (
unsigned K = 0; K < LHSCols; ++K) {
1123 RowElts.
push_back(LHSElts[K * LHSRows + R]);
1130 Dot = Builder.CreateFMul(RowElts[0], ColElts[0]);
1131 for (
unsigned K = 1; K < LHSCols; ++K)
1132 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::fmuladd,
1133 {RowElts[K], ColElts[K], Dot});
1137 Args.append(RowElts.
begin(), RowElts.
end());
1138 Args.append(ColElts.
begin(), ColElts.
end());
1139 Dot = Builder.CreateIntrinsic(EltTy, FloatDotID, Args);
1142 Dot = Builder.CreateMul(RowElts[0], ColElts[0]);
1143 for (
unsigned K = 1; K < LHSCols; ++K)
1144 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_imad,
1145 {RowElts[K], ColElts[K], Dot});
1147 unsigned ResIdx =
C * LHSRows + R;
1148 Result = Builder.CreateInsertElement(Result, Dot, ResIdx);
1162 unsigned NumElts = Rows * Cols;
1164 for (
unsigned I = 0;
I < NumElts; ++
I)
1165 Mask[
I] = (
I % Cols) * Rows + (
I / Cols);
1168 return Builder.CreateShuffleVector(Mat, Mask);
1172 Value *Result =
nullptr;
1174 switch (IntrinsicId) {
1175 case Intrinsic::abs:
1178 case Intrinsic::assume:
1181 case Intrinsic::atan2:
1184 case Intrinsic::fshl:
1187 case Intrinsic::fshr:
1190 case Intrinsic::exp:
1193 case Intrinsic::is_fpclass:
1196 case Intrinsic::log:
1199 case Intrinsic::log10:
1202 case Intrinsic::pow:
1203 case Intrinsic::powi:
1206 case Intrinsic::dx_all:
1207 case Intrinsic::dx_any:
1210 case Intrinsic::dx_cross:
1213 case Intrinsic::dx_uclamp:
1214 case Intrinsic::dx_sclamp:
1215 case Intrinsic::dx_nclamp:
1218 case Intrinsic::dx_degrees:
1221 case Intrinsic::dx_isinf:
1224 case Intrinsic::dx_isnan:
1227 case Intrinsic::dx_lerp:
1230 case Intrinsic::dx_normalize:
1233 case Intrinsic::dx_fdot:
1236 case Intrinsic::dx_sdot:
1237 case Intrinsic::dx_udot:
1240 case Intrinsic::dx_sign:
1243 case Intrinsic::dx_step:
1246 case Intrinsic::dx_radians:
1249 case Intrinsic::dx_interlocked_add:
1252 case Intrinsic::dx_interlocked_or:
1255 case Intrinsic::dx_resource_load_rawbuffer:
1259 case Intrinsic::dx_resource_store_rawbuffer:
1263 case Intrinsic::dx_resource_load_typedbuffer:
1267 case Intrinsic::dx_resource_store_typedbuffer:
1271 case Intrinsic::usub_sat:
1274 case Intrinsic::vector_reduce_add:
1275 case Intrinsic::vector_reduce_fadd:
1278 case Intrinsic::matrix_multiply:
1281 case Intrinsic::matrix_transpose:
1297 bool IntrinsicExpanded =
false;
1304 if (
F.user_empty() && IntrinsicExpanded)
1305 F.eraseFromParent();
1324 "DXIL Intrinsic Expansion",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Value * expand16BitIsNormal(CallInst *Orig)
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static Value * expand16BitIsInf(CallInst *Orig)
static bool expansionIntrinsics(Module &M)
static Value * expand16BitIsFinite(CallInst *Orig)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandUsubSat(CallInst *Orig)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandMatrixTranspose(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expand16BitIsNaN(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static Value * expandInterlockedIntrinsic(CallInst *Orig, AtomicRMWInst::BinOp Op)
static bool isIntrinsicExpansion(Function &F)
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandMatrixMultiply(CallInst *Orig)
static Value * expandIsFPClass(CallInst *Orig)
static Value * expandFunnelShiftIntrinsic(CallInst *Orig)
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Type * getParamType(unsigned i) const
Parameter type accessors.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Represents a version number in the form major[.minor[.subminor[.build]]].
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.