LLVM 23.0.0git
DXILIntrinsicExpansion.cpp
Go to the documentation of this file.
1//===- DXILIntrinsicExpansion.cpp - Prepare LLVM Module for DXIL encoding--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file This file contains DXIL intrinsic expansions for those that don't have
10// opcodes in DirectX Intermediate Language (DXIL).
11//===----------------------------------------------------------------------===//
12
14#include "DirectX.h"
15#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/Passes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/InstrTypes.h"
21#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsDirectX.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/IR/Type.h"
29#include "llvm/Pass.h"
33
34#define DEBUG_TYPE "dxil-intrinsic-expansion"
35
36using namespace llvm;
37
39
40public:
41 bool runOnModule(Module &M) override;
43
44 static char ID; // Pass identification.
45};
46
47static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
48 bool IsRaw) {
49 if (IsRaw && M->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
50 return false;
51
52 Type *ScalarTy = OverloadTy->getScalarType();
53 return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
54}
55
57 Module *M = Orig->getModule();
58 if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
59 return nullptr;
60
61 Value *Val = Orig->getOperand(0);
62 Type *ValTy = Val->getType();
63 if (!ValTy->getScalarType()->isHalfTy())
64 return nullptr;
65
66 IRBuilder<> Builder(Orig);
67 Type *IType = Type::getInt16Ty(M->getContext());
68 Constant *PosInf =
69 ValTy->isVectorTy()
73 ConstantInt::get(IType, 0x7c00))
74 : ConstantInt::get(IType, 0x7c00);
75
76 Constant *NegInf =
77 ValTy->isVectorTy()
81 ConstantInt::get(IType, 0xfc00))
82 : ConstantInt::get(IType, 0xfc00);
83
84 Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());
85 Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
86 Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
87 Value *B3 = Builder.CreateOr(B1, B2);
88 return B3;
89}
90
92 Module *M = Orig->getModule();
93 if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
94 return nullptr;
95
96 Value *Val = Orig->getOperand(0);
97 Type *ValTy = Val->getType();
98 if (!ValTy->getScalarType()->isHalfTy())
99 return nullptr;
100
101 IRBuilder<> Builder(Orig);
102 Type *IType = Type::getInt16Ty(M->getContext());
103
104 Constant *ExpBitMask =
105 ValTy->isVectorTy()
109 ConstantInt::get(IType, 0x7c00))
110 : ConstantInt::get(IType, 0x7c00);
111 Constant *SigBitMask =
112 ValTy->isVectorTy()
116 ConstantInt::get(IType, 0x3ff))
117 : ConstantInt::get(IType, 0x3ff);
118
119 Constant *Zero =
120 ValTy->isVectorTy()
124 ConstantInt::get(IType, 0))
125 : ConstantInt::get(IType, 0);
126
127 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());
128 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
129 Value *B1 = Builder.CreateICmpEQ(Exp, ExpBitMask);
130
131 Value *Sig = Builder.CreateAnd(IVal, SigBitMask);
132 Value *B2 = Builder.CreateICmpNE(Sig, Zero);
133 Value *B3 = Builder.CreateAnd(B1, B2);
134 return B3;
135}
136
138 Module *M = Orig->getModule();
139 if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
140 return nullptr;
141
142 Value *Val = Orig->getOperand(0);
143 Type *ValTy = Val->getType();
144 if (!ValTy->getScalarType()->isHalfTy())
145 return nullptr;
146
147 IRBuilder<> Builder(Orig);
148 Type *IType = Type::getInt16Ty(M->getContext());
149
150 Constant *ExpBitMask =
151 ValTy->isVectorTy()
155 ConstantInt::get(IType, 0x7c00))
156 : ConstantInt::get(IType, 0x7c00);
157
158 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());
159 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
160 Value *B1 = Builder.CreateICmpNE(Exp, ExpBitMask);
161 return B1;
162}
163
165 Module *M = Orig->getModule();
166 if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
167 return nullptr;
168
169 Value *Val = Orig->getOperand(0);
170 Type *ValTy = Val->getType();
171 if (!ValTy->getScalarType()->isHalfTy())
172 return nullptr;
173
174 IRBuilder<> Builder(Orig);
175 Type *IType = Type::getInt16Ty(M->getContext());
176
177 Constant *ExpBitMask =
178 ValTy->isVectorTy()
182 ConstantInt::get(IType, 0x7c00))
183 : ConstantInt::get(IType, 0x7c00);
184 Constant *Zero =
185 ValTy->isVectorTy()
189 ConstantInt::get(IType, 0))
190 : ConstantInt::get(IType, 0);
191
192 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());
193 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
194 Value *NotAllZeroes = Builder.CreateICmpNE(Exp, Zero);
195 Value *NotAllOnes = Builder.CreateICmpNE(Exp, ExpBitMask);
196 Value *B1 = Builder.CreateAnd(NotAllZeroes, NotAllOnes);
197 return B1;
198}
199
201 switch (F.getIntrinsicID()) {
202 case Intrinsic::assume:
203 case Intrinsic::abs:
204 case Intrinsic::atan2:
205 case Intrinsic::fshl:
206 case Intrinsic::fshr:
207 case Intrinsic::exp:
208 case Intrinsic::is_fpclass:
209 case Intrinsic::log:
210 case Intrinsic::log10:
211 case Intrinsic::pow:
212 case Intrinsic::powi:
213 case Intrinsic::dx_all:
214 case Intrinsic::dx_any:
215 case Intrinsic::dx_cross:
216 case Intrinsic::dx_uclamp:
217 case Intrinsic::dx_sclamp:
218 case Intrinsic::dx_nclamp:
219 case Intrinsic::dx_degrees:
220 case Intrinsic::dx_isinf:
221 case Intrinsic::dx_isnan:
222 case Intrinsic::dx_lerp:
223 case Intrinsic::dx_normalize:
224 case Intrinsic::dx_fdot:
225 case Intrinsic::dx_sdot:
226 case Intrinsic::dx_udot:
227 case Intrinsic::dx_sign:
228 case Intrinsic::dx_step:
229 case Intrinsic::dx_radians:
230 case Intrinsic::dx_interlocked_add:
231 case Intrinsic::dx_interlocked_or:
232 case Intrinsic::usub_sat:
233 case Intrinsic::vector_reduce_add:
234 case Intrinsic::vector_reduce_fadd:
235 case Intrinsic::matrix_multiply:
236 case Intrinsic::matrix_transpose:
237 return true;
238 case Intrinsic::dx_resource_load_rawbuffer:
240 F.getParent(), F.getReturnType()->getStructElementType(0),
241 /*IsRaw*/ true);
242 case Intrinsic::dx_resource_load_typedbuffer:
244 F.getParent(), F.getReturnType()->getStructElementType(0),
245 /*IsRaw*/ false);
246 case Intrinsic::dx_resource_store_rawbuffer:
248 F.getParent(), F.getFunctionType()->getParamType(3), /*IsRaw*/ true);
249 case Intrinsic::dx_resource_store_typedbuffer:
251 F.getParent(), F.getFunctionType()->getParamType(2), /*IsRaw*/ false);
252 }
253 return false;
254}
255
257 Value *A = Orig->getArgOperand(0);
258 Value *B = Orig->getArgOperand(1);
259 Type *Ty = A->getType();
260
261 IRBuilder<> Builder(Orig);
262
263 Value *Cmp = Builder.CreateICmpULT(A, B, "usub.cmp");
264 Value *Sub = Builder.CreateSub(A, B, "usub.sub");
265 Value *Zero = ConstantInt::get(Ty, 0);
266 return Builder.CreateSelect(Cmp, Zero, Sub, "usub.sat");
267}
268
269static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
270 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
271 IntrinsicId == Intrinsic::vector_reduce_fadd);
272
273 IRBuilder<> Builder(Orig);
274 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
275
276 Value *X = Orig->getOperand(IsFAdd ? 1 : 0);
277 Type *Ty = X->getType();
278 auto *XVec = dyn_cast<FixedVectorType>(Ty);
279 unsigned XVecSize = XVec->getNumElements();
280 Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
281
282 // Handle the initial start value for floating-point addition.
283 if (IsFAdd) {
284 Constant *StartValue = dyn_cast<Constant>(Orig->getOperand(0));
285 if (StartValue && !StartValue->isNullValue())
286 Sum = Builder.CreateFAdd(Sum, StartValue);
287 }
288
289 // Accumulate the remaining vector elements.
290 for (unsigned I = 1; I < XVecSize; I++) {
291 Value *Elt = Builder.CreateExtractElement(X, I);
292 if (IsFAdd)
293 Sum = Builder.CreateFAdd(Sum, Elt);
294 else
295 Sum = Builder.CreateAdd(Sum, Elt);
296 }
297
298 return Sum;
299}
300
301static Value *expandAbs(CallInst *Orig) {
302 Value *X = Orig->getOperand(0);
303 IRBuilder<> Builder(Orig);
304 Type *Ty = X->getType();
305 Type *EltTy = Ty->getScalarType();
306 Constant *Zero = Ty->isVectorTy()
310 ConstantInt::get(EltTy, 0))
311 : ConstantInt::get(EltTy, 0);
312 auto *V = Builder.CreateSub(Zero, X);
313 return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {X, V}, nullptr,
314 "dx.max");
315}
316
318
319 VectorType *VT = cast<VectorType>(Orig->getType());
321 reportFatalUsageError("return vector must have exactly 3 elements");
322
323 Value *op0 = Orig->getOperand(0);
324 Value *op1 = Orig->getOperand(1);
325 IRBuilder<> Builder(Orig);
326
327 Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0, "x0");
328 Value *op0_y = Builder.CreateExtractElement(op0, 1, "x1");
329 Value *op0_z = Builder.CreateExtractElement(op0, 2, "x2");
330
331 Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0, "y0");
332 Value *op1_y = Builder.CreateExtractElement(op1, 1, "y1");
333 Value *op1_z = Builder.CreateExtractElement(op1, 2, "y2");
334
335 auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {
336 Value *xy = Builder.CreateFMul(x0, y1);
337 Value *yx = Builder.CreateFMul(y0, x1);
338 return Builder.CreateFSub(xy, yx, Orig->getName());
339 };
340
341 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
342 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
343 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
344
345 Value *cross = PoisonValue::get(VT);
346 cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);
347 cross = Builder.CreateInsertElement(cross, zx_xz, 1);
348 cross = Builder.CreateInsertElement(cross, xy_yx, 2);
349 return cross;
350}
351
352// Create appropriate DXIL float dot intrinsic for the given A and B operands
353// The appropriate opcode will be determined by the size of the operands
354// The dot product is placed in the position indicated by Orig
356 Type *ATy = A->getType();
357 [[maybe_unused]] Type *BTy = B->getType();
358 assert(ATy->isVectorTy() && BTy->isVectorTy());
359
360 IRBuilder<> Builder(Orig);
361
362 auto *AVec = dyn_cast<FixedVectorType>(ATy);
363
365
366 Intrinsic::ID DotIntrinsic = Intrinsic::dx_dot4;
367 int NumElts = AVec->getNumElements();
368 switch (NumElts) {
369 case 2:
370 DotIntrinsic = Intrinsic::dx_dot2;
371 break;
372 case 3:
373 DotIntrinsic = Intrinsic::dx_dot3;
374 break;
375 case 4:
376 DotIntrinsic = Intrinsic::dx_dot4;
377 break;
378 default:
380 "Invalid dot product input vector: length is outside 2-4");
381 return nullptr;
382 }
383
385 for (int I = 0; I < NumElts; ++I)
386 Args.push_back(Builder.CreateExtractElement(A, Builder.getInt32(I)));
387 for (int I = 0; I < NumElts; ++I)
388 Args.push_back(Builder.CreateExtractElement(B, Builder.getInt32(I)));
389 return Builder.CreateIntrinsic(ATy->getScalarType(), DotIntrinsic, Args,
390 nullptr, "dot");
391}
392
393// Create the appropriate DXIL float dot intrinsic for the operands of Orig
394// The appropriate opcode will be determined by the size of the operands
395// The dot product is placed in the position indicated by Orig
397 return expandFloatDotIntrinsic(Orig, Orig->getOperand(0),
398 Orig->getOperand(1));
399}
400
401// Expand integer dot product to multiply and add ops
403 Intrinsic::ID DotIntrinsic) {
404 assert(DotIntrinsic == Intrinsic::dx_sdot ||
405 DotIntrinsic == Intrinsic::dx_udot);
406 Value *A = Orig->getOperand(0);
407 Value *B = Orig->getOperand(1);
408 Type *ATy = A->getType();
409 [[maybe_unused]] Type *BTy = B->getType();
410 assert(ATy->isVectorTy() && BTy->isVectorTy());
411
412 IRBuilder<> Builder(Orig);
413
414 auto *AVec = dyn_cast<FixedVectorType>(ATy);
415
417
418 Value *Result;
419 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
420 ? Intrinsic::dx_imad
421 : Intrinsic::dx_umad;
422 Value *Elt0 = Builder.CreateExtractElement(A, (uint64_t)0);
423 Value *Elt1 = Builder.CreateExtractElement(B, (uint64_t)0);
424 Result = Builder.CreateMul(Elt0, Elt1);
425 for (unsigned I = 1; I < AVec->getNumElements(); I++) {
426 Elt0 = Builder.CreateExtractElement(A, I);
427 Elt1 = Builder.CreateExtractElement(B, I);
428 Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,
429 ArrayRef<Value *>{Elt0, Elt1, Result},
430 nullptr, "dx.mad");
431 }
432 return Result;
433}
434
436 Value *X = Orig->getOperand(0);
437 IRBuilder<> Builder(Orig);
438 Type *Ty = X->getType();
439 Type *EltTy = Ty->getScalarType();
440 Constant *Log2eConst =
441 Ty->isVectorTy() ? ConstantVector::getSplat(
444 ConstantFP::get(EltTy, numbers::log2ef))
445 : ConstantFP::get(EltTy, numbers::log2ef);
446 Value *NewX = Builder.CreateFMul(Log2eConst, X);
447 CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
448 Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
449 Exp2Call->setTailCall(Orig->isTailCall());
450 Exp2Call->setAttributes(Orig->getAttributes());
451 return Exp2Call;
452}
453
455 Value *T = Orig->getArgOperand(1);
456 auto *TCI = dyn_cast<ConstantInt>(T);
457
458 // These FPClassTest cases have DXIL opcodes, so they will be handled in
459 // DXIL Op Lowering instead for all non f16 cases.
460 switch (TCI->getZExtValue()) {
462 return expand16BitIsInf(Orig);
464 return expand16BitIsNaN(Orig);
466 return expand16BitIsNormal(Orig);
468 return expand16BitIsFinite(Orig);
469 }
470
471 IRBuilder<> Builder(Orig);
472
473 Value *F = Orig->getArgOperand(0);
474 Type *FTy = F->getType();
475 unsigned FNumElem = 0; // 0 => F is not a vector
476
477 unsigned BitWidth; // Bit width of F or the ElemTy of F
478 Type *BitCastTy; // An IntNTy of the same bitwidth as F or ElemTy of F
479
480 if (auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {
481 Type *ElemTy = FVecTy->getElementType();
482 FNumElem = FVecTy->getNumElements();
483 BitWidth = ElemTy->getPrimitiveSizeInBits();
484 BitCastTy = FixedVectorType::get(Builder.getIntNTy(BitWidth), FNumElem);
485 } else {
487 BitCastTy = Builder.getIntNTy(BitWidth);
488 }
489
490 Value *FBitCast = Builder.CreateBitCast(F, BitCastTy);
491 switch (TCI->getZExtValue()) {
493 Value *NegZero =
494 ConstantInt::get(Builder.getIntNTy(BitWidth), 1 << (BitWidth - 1),
495 /*IsSigned=*/true);
496 Value *RetVal;
497 if (FNumElem) {
498 Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
499 RetVal =
500 Builder.CreateICmpEQ(FBitCast, NegZeroSplat, "is.fpclass.negzero");
501 } else
502 RetVal = Builder.CreateICmpEQ(FBitCast, NegZero, "is.fpclass.negzero");
503 return RetVal;
504 }
505 default:
506 reportFatalUsageError("Unsupported FPClassTest");
507 }
508}
509
511 Intrinsic::ID IntrinsicId) {
512 Value *X = Orig->getOperand(0);
513 IRBuilder<> Builder(Orig);
514 Type *Ty = X->getType();
515 Type *EltTy = Ty->getScalarType();
516
517 auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result,
518 Value *Elt) {
519 if (IntrinsicId == Intrinsic::dx_any)
520 return Builder.CreateOr(Result, Elt);
521 assert(IntrinsicId == Intrinsic::dx_all);
522 return Builder.CreateAnd(Result, Elt);
523 };
524
525 Value *Result = nullptr;
526 if (!Ty->isVectorTy()) {
527 Result = EltTy->isFloatingPointTy()
528 ? Builder.CreateFCmpUNE(X, ConstantFP::get(EltTy, 0))
529 : Builder.CreateICmpNE(X, ConstantInt::get(EltTy, 0));
530 } else {
531 auto *XVec = dyn_cast<FixedVectorType>(Ty);
532 Value *Cond =
533 EltTy->isFloatingPointTy()
534 ? Builder.CreateFCmpUNE(
536 ElementCount::getFixed(XVec->getNumElements()),
537 ConstantFP::get(EltTy, 0)))
538 : Builder.CreateICmpNE(
540 ElementCount::getFixed(XVec->getNumElements()),
541 ConstantInt::get(EltTy, 0)));
542 Result = Builder.CreateExtractElement(Cond, (uint64_t)0);
543 for (unsigned I = 1; I < XVec->getNumElements(); I++) {
544 Value *Elt = Builder.CreateExtractElement(Cond, I);
545 Result = ApplyOp(IntrinsicId, Result, Elt);
546 }
547 }
548 return Result;
549}
550
552 Value *X = Orig->getOperand(0);
553 Value *Y = Orig->getOperand(1);
554 Value *S = Orig->getOperand(2);
555 IRBuilder<> Builder(Orig);
556 auto *V = Builder.CreateFSub(Y, X);
557 V = Builder.CreateFMul(S, V);
558 return Builder.CreateFAdd(X, V, "dx.lerp");
559}
560
562 float LogConstVal = numbers::ln2f) {
563 Value *X = Orig->getOperand(0);
564 IRBuilder<> Builder(Orig);
565 Type *Ty = X->getType();
566 Type *EltTy = Ty->getScalarType();
567 Constant *Ln2Const =
568 Ty->isVectorTy() ? ConstantVector::getSplat(
571 ConstantFP::get(EltTy, LogConstVal))
572 : ConstantFP::get(EltTy, LogConstVal);
573 CallInst *Log2Call = Builder.CreateIntrinsicWithoutFolding(
574 Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
575 Log2Call->setTailCall(Orig->isTailCall());
576 Log2Call->setAttributes(Orig->getAttributes());
577 return Builder.CreateFMul(Ln2Const, Log2Call);
578}
582
583// Use dot product of vector operand with itself to calculate the length.
584// Divide the vector by that length to normalize it.
586 Value *X = Orig->getOperand(0);
587 Type *Ty = Orig->getType();
588 Type *EltTy = Ty->getScalarType();
589 IRBuilder<> Builder(Orig);
590
591 auto *XVec = dyn_cast<FixedVectorType>(Ty);
592 if (!XVec) {
593 if (auto *constantFP = dyn_cast<ConstantFP>(X)) {
594 const APFloat &fpVal = constantFP->getValueAPF();
595 if (fpVal.isZero())
596 reportFatalUsageError("Invalid input scalar: length is zero");
597 }
598 return Builder.CreateFDiv(X, X);
599 }
600
601 Value *DotProduct = expandFloatDotIntrinsic(Orig, X, X);
602
603 // verify that the length is non-zero
604 // (if the dot product is non-zero, then the length is non-zero)
605 if (auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {
606 const APFloat &fpVal = constantFP->getValueAPF();
607 if (fpVal.isZero())
608 reportFatalUsageError("Invalid input vector: length is zero");
609 }
610
611 Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,
612 ArrayRef<Value *>{DotProduct},
613 nullptr, "dx.rsqrt");
614
615 Value *MultiplicandVec =
616 Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);
617 return Builder.CreateFMul(X, MultiplicandVec);
618}
619
621 Value *Y = Orig->getOperand(0);
622 Value *X = Orig->getOperand(1);
623 Type *Ty = X->getType();
624 IRBuilder<> Builder(Orig);
625 Builder.setFastMathFlags(Orig->getFastMathFlags());
626
627 Value *Tan = Builder.CreateFDiv(Y, X);
628
629 CallInst *Atan = Builder.CreateIntrinsicWithoutFolding(
630 Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");
631 Atan->setTailCall(Orig->isTailCall());
632 Atan->setAttributes(Orig->getAttributes());
633
634 // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.
635 Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi);
636 Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2);
637 Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2);
638 Constant *Zero = ConstantFP::get(Ty, 0);
639 Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
640 Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
641
642 // x > 0 -> atan.
643 Value *Result = Atan;
644 Value *XLt0 = Builder.CreateFCmpOLT(X, Zero);
645 Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero);
646 Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero);
647 Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero);
648
649 // x < 0, y >= 0 -> atan + pi.
650 Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
651 Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
652
653 // x < 0, y < 0 -> atan - pi.
654 Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
655 Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
656
657 // x == 0, y < 0 -> -pi/2
658 Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
659 Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
660
661 // x == 0, y > 0 -> pi/2
662 Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
663 Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
664
665 return Result;
666}
667
668template <bool LeftFunnel>
670 Type *Ty = Orig->getType();
671 Value *A = Orig->getOperand(0);
672 Value *B = Orig->getOperand(1);
673 Value *Shift = Orig->getOperand(2);
674
675 IRBuilder<> Builder(Orig);
676
677 unsigned BitWidth = Ty->getScalarSizeInBits();
679 "Can't use Mask to compute modulo and inverse");
680
681 // Note: if (Shift % BitWidth) == 0 then (BitWidth - Shift) == BitWidth,
682 // shifting by the bitwidth for shl/lshr returns a poisoned result. As such,
683 // we implement the same formula as LegalizerHelper::lowerFunnelShiftAsShifts.
684 //
685 // The funnel shift is expanded like so:
686 // fshl
687 // -> msb_extract((concat(A, B) << (Shift % BitWidth)), BitWidth)
688 // -> A << (Shift % BitWidth) | B >> 1 >> (BitWidth - 1 - (Shift % BitWidth))
689 // fshr
690 // -> lsb_extract((concat(A, B) >> (Shift % BitWidth), BitWidth))
691 // -> A << 1 << (BitWidth - 1 - (Shift % BitWidth)) | B >> (Shift % BitWidth)
692
693 // (BitWidth - 1) -> Mask
694 Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);
695
696 // Shift % BitWidth
697 // -> Shift & (BitWidth - 1)
698 // -> Shift & Mask
699 Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
700
701 // (BitWidth - 1) - (Shift % BitWidth)
702 // -> ~Shift & (BitWidth - 1)
703 // -> ~Shift & Mask
704 Value *NotShift = Builder.CreateNot(Shift);
705 Value *InverseShift = Builder.CreateAnd(NotShift, Mask);
706
707 Constant *One = ConstantInt::get(Ty, 1);
708 Value *ShiftedA;
709 Value *ShiftedB;
710
711 if (LeftFunnel) {
712 ShiftedA = Builder.CreateShl(A, MaskedShift);
713 Value *ShiftB1 = Builder.CreateLShr(B, One);
714 ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);
715 } else {
716 Value *ShiftA1 = Builder.CreateShl(A, One);
717 ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);
718 ShiftedB = Builder.CreateLShr(B, MaskedShift);
719 }
720
721 Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);
722 return Result;
723}
724
725static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {
726
727 Value *X = Orig->getOperand(0);
728 Value *Y = Orig->getOperand(1);
729 Type *Ty = X->getType();
730 IRBuilder<> Builder(Orig);
731
732 if (IntrinsicId == Intrinsic::powi)
733 Y = Builder.CreateSIToFP(Y, Ty);
734
735 Value *Log2Call =
736 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");
737 auto *Mul = Builder.CreateFMul(Log2Call, Y);
738 CallInst *Exp2Call = Builder.CreateIntrinsicWithoutFolding(
739 Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");
740 Exp2Call->setTailCall(Orig->isTailCall());
741 Exp2Call->setAttributes(Orig->getAttributes());
742 return Exp2Call;
743}
744
746
747 Value *X = Orig->getOperand(0);
748 Value *Y = Orig->getOperand(1);
749 Type *Ty = X->getType();
750 IRBuilder<> Builder(Orig);
751
752 Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
753 Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
754 Value *Cond = Builder.CreateFCmpOLT(Y, X);
755
756 if (Ty != Ty->getScalarType()) {
757 auto *XVec = dyn_cast<FixedVectorType>(Ty);
759 ElementCount::getFixed(XVec->getNumElements()), One);
761 ElementCount::getFixed(XVec->getNumElements()), Zero);
762 }
763
764 return Builder.CreateSelect(Cond, Zero, One);
765}
766
768 Value *X = Orig->getOperand(0);
769 Type *Ty = X->getType();
770 IRBuilder<> Builder(Orig);
771 Value *PiOver180 = ConstantFP::get(Ty, llvm::numbers::pi / 180.0);
772 return Builder.CreateFMul(X, PiOver180);
773}
774
777 // Lower @llvm.dx.interlocked.OP(ptr, val) to `atomicrmw OP ptr, val
778 // monotonic`. HLSL Interlocked operations imply no fence/barrier, which maps
779 // to monotonic ordering. The instruction's result is the old value, matching
780 // the intrinsic's return value.
781 Value *Ptr = Orig->getArgOperand(0);
782 Value *Val = Orig->getArgOperand(1);
783 IRBuilder<> Builder(Orig);
784 return Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
786}
787
788static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
789 IRBuilder<> Builder(Orig);
790
791 Type *BufferTy = Orig->getType()->getStructElementType(0);
792 Type *ScalarTy = BufferTy->getScalarType();
793 bool IsDouble = ScalarTy->isDoubleTy();
794 assert(IsDouble || ScalarTy->isIntegerTy(64) &&
795 "Only expand double or int64 scalars or vectors");
796 bool IsVector = false;
797 unsigned ExtractNum = 2;
798 if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
799 ExtractNum = 2 * VT->getNumElements();
800 IsVector = true;
801 assert(IsRaw || ExtractNum == 4 && "TypedBufferLoad vector must be size 2");
802 }
803
805 Value *Result = PoisonValue::get(BufferTy);
806 unsigned Base = 0;
807 // If we need to extract more than 4 i32; we need to break it up into
808 // more than one load. LoadNum tells us how many i32s we are loading in
809 // each load
810 while (ExtractNum > 0) {
811 unsigned LoadNum = std::min(ExtractNum, 4u);
812 Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);
813
814 Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());
815 Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
816 SmallVector<Value *, 3> Args = {Orig->getOperand(0), Orig->getOperand(1)};
817 if (IsRaw) {
818 LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
819 Value *Tmp = Builder.getInt32(4 * Base * 2);
820 Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
821 }
822
823 Value *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
824 Loads.push_back(Load);
825
826 // extract the buffer load's result
827 Value *Extract = Builder.CreateExtractValue(Load, {0});
828
829 SmallVector<Value *> ExtractElements;
830 for (unsigned I = 0; I < LoadNum; ++I)
831 ExtractElements.push_back(
832 Builder.CreateExtractElement(Extract, Builder.getInt32(I)));
833
834 // combine into double(s) or int64(s)
835 for (unsigned I = 0; I < LoadNum; I += 2) {
836 Value *Combined = nullptr;
837 if (IsDouble)
838 // For doubles, use dx_asdouble intrinsic
839 Combined = Builder.CreateIntrinsic(
840 Builder.getDoubleTy(), Intrinsic::dx_asdouble,
841 {ExtractElements[I], ExtractElements[I + 1]});
842 else {
843 // For int64, manually combine two int32s
844 // First, zero-extend both values to i64
845 Value *Lo =
846 Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());
847 Value *Hi =
848 Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());
849 // Shift the high bits left by 32 bits
850 Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));
851 // OR the high and low bits together
852 Combined = Builder.CreateOr(Lo, ShiftedHi);
853 }
854
855 if (IsVector)
856 Result = Builder.CreateInsertElement(Result, Combined,
857 Builder.getInt32((I / 2) + Base));
858 else
859 Result = Combined;
860 }
861
862 ExtractNum -= LoadNum;
863 Base += LoadNum / 2;
864 }
865
866 Value *CheckBit = nullptr;
867 for (User *U : make_early_inc_range(Orig->users())) {
868 // If it's not a ExtractValueInst, we don't know how to
869 // handle it
870 auto *EVI = dyn_cast<ExtractValueInst>(U);
871 if (!EVI)
872 llvm_unreachable("Unexpected user of typedbufferload");
873
874 ArrayRef<unsigned> Indices = EVI->getIndices();
875 assert(Indices.size() == 1);
876
877 if (Indices[0] == 0) {
878 // Use of the value(s)
879 EVI->replaceAllUsesWith(Result);
880 } else {
881 // Use of the check bit
882 assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
883 // Note: This does not always match the historical behaviour of DXC.
884 // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
885 if (!CheckBit) {
886 SmallVector<Value *, 2> CheckBits;
887 for (Value *L : Loads)
888 CheckBits.push_back(Builder.CreateExtractValue(L, {1}));
889 CheckBit = Builder.CreateAnd(CheckBits);
890 }
891 EVI->replaceAllUsesWith(CheckBit);
892 }
893 EVI->eraseFromParent();
894 }
895 Orig->eraseFromParent();
896 return true;
897}
898
899static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
900 IRBuilder<> Builder(Orig);
901
902 unsigned ValIndex = IsRaw ? 3 : 2;
903 Type *BufferTy = Orig->getFunctionType()->getParamType(ValIndex);
904 Type *ScalarTy = BufferTy->getScalarType();
905 bool IsDouble = ScalarTy->isDoubleTy();
906 assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
907 "Only expand double or int64 scalars or vectors");
908
909 // Determine if we're dealing with a vector or scalar
910 bool IsVector = false;
911 unsigned ExtractNum = 2;
912 unsigned VecLen = 0;
913 if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
914 VecLen = VT->getNumElements();
915 assert(IsRaw || VecLen == 2 && "TypedBufferStore vector must be size 2");
916 ExtractNum = VecLen * 2;
917 IsVector = true;
918 }
919
920 // Create the appropriate vector type for the result
921 Type *Int32Ty = Builder.getInt32Ty();
922 Type *ResultTy = VectorType::get(Int32Ty, ExtractNum, false);
923 Value *Val = PoisonValue::get(ResultTy);
924
925 Type *SplitElementTy = Int32Ty;
926 if (IsVector)
927 SplitElementTy = VectorType::get(SplitElementTy, VecLen, false);
928
929 Value *LowBits = nullptr;
930 Value *HighBits = nullptr;
931 // Split the 64-bit values into 32-bit components
932 if (IsDouble) {
933 auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
934 Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
935 {Orig->getOperand(ValIndex)});
936 LowBits = Builder.CreateExtractValue(Split, 0);
937 HighBits = Builder.CreateExtractValue(Split, 1);
938 } else {
939 // Handle int64 type(s)
940 Value *InputVal = Orig->getOperand(ValIndex);
941 Constant *ShiftAmt = Builder.getInt64(32);
942 if (IsVector)
943 ShiftAmt =
945
946 // Split into low and high 32-bit parts
947 LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
948 Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
949 HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
950 }
951
952 if (IsVector) {
954 for (unsigned I = 0; I < VecLen; ++I) {
955 Mask.push_back(I);
956 Mask.push_back(I + VecLen);
957 }
958 Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
959 } else {
960 Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
961 Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
962 }
963
964 // If we need to extract more than 4 i32; we need to break it up into
965 // more than one store. StoreNum tells us how many i32s we are storing in
966 // each store
967 unsigned Base = 0;
968 while (ExtractNum > 0) {
969 unsigned StoreNum = std::min(ExtractNum, 4u);
970
971 Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
972 SmallVector<Value *, 4> Args = {Orig->getOperand(0), Orig->getOperand(1)};
973 if (IsRaw) {
974 StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
975 Value *Tmp = Builder.getInt32(4 * Base);
976 Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));
977 }
978
980 for (unsigned I = 0; I < StoreNum; ++I) {
981 Mask.push_back(Base + I);
982 }
983
984 Value *SubVal = Val;
985 if (VecLen > 2)
986 SubVal = Builder.CreateShuffleVector(Val, Mask);
987
988 Args.push_back(SubVal);
989 // Create the final intrinsic call
990 Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
991
992 ExtractNum -= StoreNum;
993 Base += StoreNum;
994 }
995 Orig->eraseFromParent();
996 return true;
997}
998
1000 if (ClampIntrinsic == Intrinsic::dx_uclamp)
1001 return Intrinsic::umax;
1002 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1003 return Intrinsic::smax;
1004 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1005 return Intrinsic::maxnum;
1006}
1007
1009 if (ClampIntrinsic == Intrinsic::dx_uclamp)
1010 return Intrinsic::umin;
1011 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1012 return Intrinsic::smin;
1013 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1014 return Intrinsic::minnum;
1015}
1016
1018 Intrinsic::ID ClampIntrinsic) {
1019 Value *X = Orig->getOperand(0);
1020 Value *Min = Orig->getOperand(1);
1021 Value *Max = Orig->getOperand(2);
1022 Type *Ty = X->getType();
1023 IRBuilder<> Builder(Orig);
1024 auto *MaxCall = Builder.CreateIntrinsic(Ty, getMaxForClamp(ClampIntrinsic),
1025 {X, Min}, nullptr, "dx.max");
1026 return Builder.CreateIntrinsic(Ty, getMinForClamp(ClampIntrinsic),
1027 {MaxCall, Max}, nullptr, "dx.min");
1028}
1029
1031 Value *X = Orig->getOperand(0);
1032 Type *Ty = X->getType();
1033 IRBuilder<> Builder(Orig);
1034 Value *DegreesRatio = ConstantFP::get(Ty, 180.0 * llvm::numbers::inv_pi);
1035 return Builder.CreateFMul(X, DegreesRatio);
1036}
1037
1039 Value *X = Orig->getOperand(0);
1040 Type *Ty = X->getType();
1041 Type *ScalarTy = Ty->getScalarType();
1042 Type *RetTy = Orig->getType();
1043 Constant *Zero = Constant::getNullValue(Ty);
1044
1045 IRBuilder<> Builder(Orig);
1046
1047 Value *GT;
1048 Value *LT;
1049 if (ScalarTy->isFloatingPointTy()) {
1050 GT = Builder.CreateFCmpOLT(Zero, X);
1051 LT = Builder.CreateFCmpOLT(X, Zero);
1052 } else {
1053 assert(ScalarTy->isIntegerTy());
1054 GT = Builder.CreateICmpSLT(Zero, X);
1055 LT = Builder.CreateICmpSLT(X, Zero);
1056 }
1057
1058 Value *ZextGT = Builder.CreateZExt(GT, RetTy);
1059 Value *ZextLT = Builder.CreateZExt(LT, RetTy);
1060
1061 return Builder.CreateSub(ZextGT, ZextLT);
1062}
1063
1064// Expand llvm.matrix.multiply by extracting row/column vectors and computing
1065// dot products.
1066// Result[r,c] = dot(row_r(LHS), col_c(RHS))
1067// Element (r,c) is at index c*NumRows + r (column-major).
1069 Value *LHS = Orig->getArgOperand(0);
1070 Value *RHS = Orig->getArgOperand(1);
1071 unsigned LHSRows = cast<ConstantInt>(Orig->getArgOperand(2))->getZExtValue();
1072 unsigned LHSCols = cast<ConstantInt>(Orig->getArgOperand(3))->getZExtValue();
1073 unsigned RHSCols = cast<ConstantInt>(Orig->getArgOperand(4))->getZExtValue();
1074
1075 auto *RetTy = cast<FixedVectorType>(Orig->getType());
1076 Type *EltTy = RetTy->getElementType();
1077 bool IsFP = EltTy->isFloatingPointTy();
1078
1079 IRBuilder<> Builder(Orig);
1080
1081 // Column-major indexing:
1082 // LHS row R, element K: index = K * LHSRows + R
1083 // RHS col C, element K: index = C * LHSCols + K
1084 Value *Result = PoisonValue::get(RetTy);
1085
1086 // Extract all scalar elements from LHS and RHS once, then reuse them.
1087 unsigned LHSSize = LHSRows * LHSCols;
1088 unsigned RHSSize = LHSCols * RHSCols;
1089 SmallVector<Value *, 16> LHSElts(LHSSize);
1090 SmallVector<Value *, 16> RHSElts(RHSSize);
1091 for (unsigned I = 0; I < LHSSize; ++I)
1092 LHSElts[I] = Builder.CreateExtractElement(LHS, I);
1093 for (unsigned I = 0; I < RHSSize; ++I)
1094 RHSElts[I] = Builder.CreateExtractElement(RHS, I);
1095
1096 // Choose the appropriate scalar-arg dot intrinsic for floats.
1097 // K=1 and double types use scalar expansion instead.
1099 bool UseScalarFP = IsFP && (EltTy->isDoubleTy() || LHSCols == 1);
1100 if (IsFP && !UseScalarFP) {
1101 switch (LHSCols) {
1102 case 2:
1103 FloatDotID = Intrinsic::dx_dot2;
1104 break;
1105 case 3:
1106 FloatDotID = Intrinsic::dx_dot3;
1107 break;
1108 case 4:
1109 FloatDotID = Intrinsic::dx_dot4;
1110 break;
1111 default:
1113 "Invalid matrix inner dimension for dot product: must be 2-4");
1114 return nullptr;
1115 }
1116 }
1117
1118 for (unsigned C = 0; C < RHSCols; ++C) {
1119 for (unsigned R = 0; R < LHSRows; ++R) {
1120 // Gather row R from LHS and column C from RHS.
1121 SmallVector<Value *, 4> RowElts, ColElts;
1122 for (unsigned K = 0; K < LHSCols; ++K) {
1123 RowElts.push_back(LHSElts[K * LHSRows + R]);
1124 ColElts.push_back(RHSElts[C * LHSCols + K]);
1125 }
1126
1127 Value *Dot;
1128 if (UseScalarFP) {
1129 // Scalar fmul+fmuladd expansion for double types and K=1.
1130 Dot = Builder.CreateFMul(RowElts[0], ColElts[0]);
1131 for (unsigned K = 1; K < LHSCols; ++K)
1132 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::fmuladd,
1133 {RowElts[K], ColElts[K], Dot});
1134 } else if (IsFP) {
1135 // Emit scalar-arg DXIL dot directly (dx.dot2/dx.dot3/dx.dot4).
1137 Args.append(RowElts.begin(), RowElts.end());
1138 Args.append(ColElts.begin(), ColElts.end());
1139 Dot = Builder.CreateIntrinsic(EltTy, FloatDotID, Args);
1140 } else {
1141 // Integer: emit multiply + imad chain.
1142 Dot = Builder.CreateMul(RowElts[0], ColElts[0]);
1143 for (unsigned K = 1; K < LHSCols; ++K)
1144 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_imad,
1145 {RowElts[K], ColElts[K], Dot});
1146 }
1147 unsigned ResIdx = C * LHSRows + R;
1148 Result = Builder.CreateInsertElement(Result, Dot, ResIdx);
1149 }
1150 }
1151 return Result;
1152}
1153
1154// Expand llvm.matrix.transpose as a shufflevector that permutes elements
1155// from column-major source to column-major transposed layout.
1156// Element (r,c) at index c*Rows + r moves to index r*Cols + c.
1158 Value *Mat = Orig->getArgOperand(0);
1159 unsigned Rows = cast<ConstantInt>(Orig->getArgOperand(1))->getZExtValue();
1160 unsigned Cols = cast<ConstantInt>(Orig->getArgOperand(2))->getZExtValue();
1161
1162 unsigned NumElts = Rows * Cols;
1163 SmallVector<int, 16> Mask(NumElts);
1164 for (unsigned I = 0; I < NumElts; ++I)
1165 Mask[I] = (I % Cols) * Rows + (I / Cols);
1166
1167 IRBuilder<> Builder(Orig);
1168 return Builder.CreateShuffleVector(Mat, Mask);
1169}
1170
1171static bool expandIntrinsic(Function &F, CallInst *Orig) {
1172 Value *Result = nullptr;
1173 Intrinsic::ID IntrinsicId = F.getIntrinsicID();
1174 switch (IntrinsicId) {
1175 case Intrinsic::abs:
1176 Result = expandAbs(Orig);
1177 break;
1178 case Intrinsic::assume:
1179 Orig->eraseFromParent();
1180 return true;
1181 case Intrinsic::atan2:
1182 Result = expandAtan2Intrinsic(Orig);
1183 break;
1184 case Intrinsic::fshl:
1185 Result = expandFunnelShiftIntrinsic<true>(Orig);
1186 break;
1187 case Intrinsic::fshr:
1188 Result = expandFunnelShiftIntrinsic<false>(Orig);
1189 break;
1190 case Intrinsic::exp:
1191 Result = expandExpIntrinsic(Orig);
1192 break;
1193 case Intrinsic::is_fpclass:
1194 Result = expandIsFPClass(Orig);
1195 break;
1196 case Intrinsic::log:
1197 Result = expandLogIntrinsic(Orig);
1198 break;
1199 case Intrinsic::log10:
1200 Result = expandLog10Intrinsic(Orig);
1201 break;
1202 case Intrinsic::pow:
1203 case Intrinsic::powi:
1204 Result = expandPowIntrinsic(Orig, IntrinsicId);
1205 break;
1206 case Intrinsic::dx_all:
1207 case Intrinsic::dx_any:
1208 Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId);
1209 break;
1210 case Intrinsic::dx_cross:
1211 Result = expandCrossIntrinsic(Orig);
1212 break;
1213 case Intrinsic::dx_uclamp:
1214 case Intrinsic::dx_sclamp:
1215 case Intrinsic::dx_nclamp:
1216 Result = expandClampIntrinsic(Orig, IntrinsicId);
1217 break;
1218 case Intrinsic::dx_degrees:
1219 Result = expandDegreesIntrinsic(Orig);
1220 break;
1221 case Intrinsic::dx_isinf:
1222 Result = expand16BitIsInf(Orig);
1223 break;
1224 case Intrinsic::dx_isnan:
1225 Result = expand16BitIsNaN(Orig);
1226 break;
1227 case Intrinsic::dx_lerp:
1228 Result = expandLerpIntrinsic(Orig);
1229 break;
1230 case Intrinsic::dx_normalize:
1231 Result = expandNormalizeIntrinsic(Orig);
1232 break;
1233 case Intrinsic::dx_fdot:
1234 Result = expandFloatDotIntrinsic(Orig);
1235 break;
1236 case Intrinsic::dx_sdot:
1237 case Intrinsic::dx_udot:
1238 Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);
1239 break;
1240 case Intrinsic::dx_sign:
1241 Result = expandSignIntrinsic(Orig);
1242 break;
1243 case Intrinsic::dx_step:
1244 Result = expandStepIntrinsic(Orig);
1245 break;
1246 case Intrinsic::dx_radians:
1247 Result = expandRadiansIntrinsic(Orig);
1248 break;
1249 case Intrinsic::dx_interlocked_add:
1251 break;
1252 case Intrinsic::dx_interlocked_or:
1254 break;
1255 case Intrinsic::dx_resource_load_rawbuffer:
1256 if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true))
1257 return true;
1258 break;
1259 case Intrinsic::dx_resource_store_rawbuffer:
1260 if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ true))
1261 return true;
1262 break;
1263 case Intrinsic::dx_resource_load_typedbuffer:
1264 if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ false))
1265 return true;
1266 break;
1267 case Intrinsic::dx_resource_store_typedbuffer:
1268 if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ false))
1269 return true;
1270 break;
1271 case Intrinsic::usub_sat:
1272 Result = expandUsubSat(Orig);
1273 break;
1274 case Intrinsic::vector_reduce_add:
1275 case Intrinsic::vector_reduce_fadd:
1276 Result = expandVecReduceAdd(Orig, IntrinsicId);
1277 break;
1278 case Intrinsic::matrix_multiply:
1279 Result = expandMatrixMultiply(Orig);
1280 break;
1281 case Intrinsic::matrix_transpose:
1282 Result = expandMatrixTranspose(Orig);
1283 break;
1284 }
1285 if (Result) {
1286 Orig->replaceAllUsesWith(Result);
1287 Orig->eraseFromParent();
1288 return true;
1289 }
1290 return false;
1291}
1292
1294 for (auto &F : make_early_inc_range(M.functions())) {
1295 if (!isIntrinsicExpansion(F))
1296 continue;
1297 bool IntrinsicExpanded = false;
1298 for (User *U : make_early_inc_range(F.users())) {
1299 auto *IntrinsicCall = dyn_cast<CallInst>(U);
1300 if (!IntrinsicCall)
1301 continue;
1302 IntrinsicExpanded = expandIntrinsic(F, IntrinsicCall);
1303 }
1304 if (F.user_empty() && IntrinsicExpanded)
1305 F.eraseFromParent();
1306 }
1307 return true;
1308}
1309
1316
1320
1322
1324 "DXIL Intrinsic Expansion", false, false)
1326 "DXIL Intrinsic Expansion", false, false)
1327
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Value * expand16BitIsNormal(CallInst *Orig)
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static Value * expand16BitIsInf(CallInst *Orig)
static bool expansionIntrinsics(Module &M)
static Value * expand16BitIsFinite(CallInst *Orig)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandUsubSat(CallInst *Orig)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandMatrixTranspose(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expand16BitIsNaN(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static Value * expandInterlockedIntrinsic(CallInst *Orig, AtomicRMWInst::BinOp Op)
static bool isIntrinsicExpansion(Function &F)
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandMatrixMultiply(CallInst *Orig)
static Value * expandIsFPClass(CallInst *Orig)
static Value * expandFunnelShiftIntrinsic(CallInst *Orig)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
BinaryOperator * Mul
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
bool isZero() const
Definition APFloat.h:1540
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
void setTailCall(bool IsTc=true)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
Definition Constant.h:43
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
Type * getParamType(unsigned i) const
Parameter type accessors.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255
ModulePass(char &pid)
Definition Pass.h:257
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:477
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Represents a version number in the form major[.minor[.subminor[.build]]].
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
constexpr double inv_pi
constexpr float ln10f
Definition MathExtras.h:50
constexpr float log2ef
Definition MathExtras.h:51
constexpr double pi
constexpr float ln2f
Definition MathExtras.h:49
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106