doxygen/DXILIntrinsicExpansion_8cpp_source.html

//===- DXILIntrinsicExpansion.cpp - Prepare LLVM Module for DXIL encoding--===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file This file contains DXIL intrinsic expansions for those that don't have

//  opcodes in DirectX Intermediate Language (DXIL).

//===----------------------------------------------------------------------===//


#include "DXILIntrinsicExpansion.h"

#include "DirectX.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/CodeGen/Passes.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/IntrinsicsDirectX.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/IR/Type.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"


#define DEBUG_TYPE "dxil-intrinsic-expansion"


using namespace llvm;


class DXILIntrinsicExpansionLegacy : public ModulePass {


public:

  bool runOnModule(Module &M) override;

  DXILIntrinsicExpansionLegacy() : ModulePass(ID) {}


  static char ID; // Pass identification.

};


static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,

                                              bool IsRaw) {

  if (IsRaw && M->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))

    return false;


  Type *ScalarTy = OverloadTy->getScalarType();

  return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);

}


static Value *expand16BitIsInf(CallInst *Orig) {

  Module *M = Orig->getModule();

  if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))

    return nullptr;


  Value *Val = Orig->getOperand(0);

  Type *ValTy = Val->getType();

  if (!ValTy->getScalarType()->isHalfTy())

    return nullptr;


  IRBuilder<> Builder(Orig);

  Type *IType = Type::getInt16Ty(M->getContext());

  Constant *PosInf =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0x7c00))

          : ConstantInt::get(IType, 0x7c00);


  Constant *NegInf =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0xfc00))

          : ConstantInt::get(IType, 0xfc00);


  Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());

  Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);

  Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);

  Value *B3 = Builder.CreateOr(B1, B2);

  return B3;

}


static Value *expand16BitIsNaN(CallInst *Orig) {

  Module *M = Orig->getModule();

  if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))

    return nullptr;


  Value *Val = Orig->getOperand(0);

  Type *ValTy = Val->getType();

  if (!ValTy->getScalarType()->isHalfTy())

    return nullptr;


  IRBuilder<> Builder(Orig);

  Type *IType = Type::getInt16Ty(M->getContext());


  Constant *ExpBitMask =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0x7c00))

          : ConstantInt::get(IType, 0x7c00);

  Constant *SigBitMask =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0x3ff))

          : ConstantInt::get(IType, 0x3ff);


  Constant *Zero =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0))

          : ConstantInt::get(IType, 0);


  Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());

  Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);

  Value *B1 = Builder.CreateICmpEQ(Exp, ExpBitMask);


  Value *Sig = Builder.CreateAnd(IVal, SigBitMask);

  Value *B2 = Builder.CreateICmpNE(Sig, Zero);

  Value *B3 = Builder.CreateAnd(B1, B2);

  return B3;

}


static Value *expand16BitIsFinite(CallInst *Orig) {

  Module *M = Orig->getModule();

  if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))

    return nullptr;


  Value *Val = Orig->getOperand(0);

  Type *ValTy = Val->getType();

  if (!ValTy->getScalarType()->isHalfTy())

    return nullptr;


  IRBuilder<> Builder(Orig);

  Type *IType = Type::getInt16Ty(M->getContext());


  Constant *ExpBitMask =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0x7c00))

          : ConstantInt::get(IType, 0x7c00);


  Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());

  Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);

  Value *B1 = Builder.CreateICmpNE(Exp, ExpBitMask);

  return B1;

}


static Value *expand16BitIsNormal(CallInst *Orig) {

  Module *M = Orig->getModule();

  if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))

    return nullptr;


  Value *Val = Orig->getOperand(0);

  Type *ValTy = Val->getType();

  if (!ValTy->getScalarType()->isHalfTy())

    return nullptr;


  IRBuilder<> Builder(Orig);

  Type *IType = Type::getInt16Ty(M->getContext());


  Constant *ExpBitMask =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0x7c00))

          : ConstantInt::get(IType, 0x7c00);

  Constant *Zero =

      ValTy->isVectorTy()

          ? ConstantVector::getSplat(

                ElementCount::getFixed(

                    cast<FixedVectorType>(ValTy)->getNumElements()),

                ConstantInt::get(IType, 0))

          : ConstantInt::get(IType, 0);


  Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->getType());

  Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);

  Value *NotAllZeroes = Builder.CreateICmpNE(Exp, Zero);

  Value *NotAllOnes = Builder.CreateICmpNE(Exp, ExpBitMask);

  Value *B1 = Builder.CreateAnd(NotAllZeroes, NotAllOnes);

  return B1;

}


static bool isIntrinsicExpansion(Function &F) {

  switch (F.getIntrinsicID()) {

  case Intrinsic::assume:

  case Intrinsic::abs:

  case Intrinsic::atan2:

  case Intrinsic::fshl:

  case Intrinsic::fshr:

  case Intrinsic::exp:

  case Intrinsic::is_fpclass:

  case Intrinsic::log:

  case Intrinsic::log10:

  case Intrinsic::pow:

  case Intrinsic::powi:

  case Intrinsic::dx_all:

  case Intrinsic::dx_any:

  case Intrinsic::dx_cross:

  case Intrinsic::dx_uclamp:

  case Intrinsic::dx_sclamp:

  case Intrinsic::dx_nclamp:

  case Intrinsic::dx_degrees:

  case Intrinsic::dx_isinf:

  case Intrinsic::dx_isnan:

  case Intrinsic::dx_lerp:

  case Intrinsic::dx_normalize:

  case Intrinsic::dx_fdot:

  case Intrinsic::dx_sdot:

  case Intrinsic::dx_udot:

  case Intrinsic::dx_sign:

  case Intrinsic::dx_step:

  case Intrinsic::dx_radians:

  case Intrinsic::usub_sat:

  case Intrinsic::vector_reduce_add:

  case Intrinsic::vector_reduce_fadd:

    return true;

  case Intrinsic::dx_resource_load_rawbuffer:

    return resourceAccessNeeds64BitExpansion(

        F.getParent(), F.getReturnType()->getStructElementType(0),

        /*IsRaw*/ true);

  case Intrinsic::dx_resource_load_typedbuffer:

    return resourceAccessNeeds64BitExpansion(

        F.getParent(), F.getReturnType()->getStructElementType(0),

        /*IsRaw*/ false);

  case Intrinsic::dx_resource_store_rawbuffer:

    return resourceAccessNeeds64BitExpansion(

        F.getParent(), F.getFunctionType()->getParamType(3), /*IsRaw*/ true);

  case Intrinsic::dx_resource_store_typedbuffer:

    return resourceAccessNeeds64BitExpansion(

        F.getParent(), F.getFunctionType()->getParamType(2), /*IsRaw*/ false);

  }

  return false;

}


static Value *expandUsubSat(CallInst *Orig) {

  Value *A = Orig->getArgOperand(0);

  Value *B = Orig->getArgOperand(1);

  Type *Ty = A->getType();


  IRBuilder<> Builder(Orig);


  Value *Cmp = Builder.CreateICmpULT(A, B, "usub.cmp");

  Value *Sub = Builder.CreateSub(A, B, "usub.sub");

  Value *Zero = ConstantInt::get(Ty, 0);

  return Builder.CreateSelect(Cmp, Zero, Sub, "usub.sat");

}


static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {

  assert(IntrinsicId == Intrinsic::vector_reduce_add ||

         IntrinsicId == Intrinsic::vector_reduce_fadd);


  IRBuilder<> Builder(Orig);

  bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);


  Value *X = Orig->getOperand(IsFAdd ? 1 : 0);

  Type *Ty = X->getType();

  auto *XVec = dyn_cast<FixedVectorType>(Ty);

  unsigned XVecSize = XVec->getNumElements();

  Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));


  // Handle the initial start value for floating-point addition.

  if (IsFAdd) {

    Constant *StartValue = dyn_cast<Constant>(Orig->getOperand(0));

    if (StartValue && !StartValue->isZeroValue())

      Sum = Builder.CreateFAdd(Sum, StartValue);

  }


  // Accumulate the remaining vector elements.

  for (unsigned I = 1; I < XVecSize; I++) {

    Value *Elt = Builder.CreateExtractElement(X, I);

    if (IsFAdd)

      Sum = Builder.CreateFAdd(Sum, Elt);

    else

      Sum = Builder.CreateAdd(Sum, Elt);

  }


  return Sum;

}


static Value *expandAbs(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  IRBuilder<> Builder(Orig);

  Type *Ty = X->getType();

  Type *EltTy = Ty->getScalarType();

  Constant *Zero = Ty->isVectorTy()

                       ? ConstantVector::getSplat(

                             ElementCount::getFixed(

                                 cast<FixedVectorType>(Ty)->getNumElements()),

                             ConstantInt::get(EltTy, 0))

                       : ConstantInt::get(EltTy, 0);

  auto *V = Builder.CreateSub(Zero, X);

  return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {X, V}, nullptr,

                                 "dx.max");

}


static Value *expandCrossIntrinsic(CallInst *Orig) {


  VectorType *VT = cast<VectorType>(Orig->getType());

  if (cast<FixedVectorType>(VT)->getNumElements() != 3)

    reportFatalUsageError("return vector must have exactly 3 elements");


  Value *op0 = Orig->getOperand(0);

  Value *op1 = Orig->getOperand(1);

  IRBuilder<> Builder(Orig);


  Value *op0_x = Builder.CreateExtractElement(op0, (uint64_t)0, "x0");

  Value *op0_y = Builder.CreateExtractElement(op0, 1, "x1");

  Value *op0_z = Builder.CreateExtractElement(op0, 2, "x2");


  Value *op1_x = Builder.CreateExtractElement(op1, (uint64_t)0, "y0");

  Value *op1_y = Builder.CreateExtractElement(op1, 1, "y1");

  Value *op1_z = Builder.CreateExtractElement(op1, 2, "y2");


  auto MulSub = [&](Value *x0, Value *y0, Value *x1, Value *y1) -> Value * {

    Value *xy = Builder.CreateFMul(x0, y1);

    Value *yx = Builder.CreateFMul(y0, x1);

    return Builder.CreateFSub(xy, yx, Orig->getName());

  };


  Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);

  Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);

  Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);


  Value *cross = PoisonValue::get(VT);

  cross = Builder.CreateInsertElement(cross, yz_zy, (uint64_t)0);

  cross = Builder.CreateInsertElement(cross, zx_xz, 1);

  cross = Builder.CreateInsertElement(cross, xy_yx, 2);

  return cross;

}


// Create appropriate DXIL float dot intrinsic for the given A and B operands

// The appropriate opcode will be determined by the size of the operands

// The dot product is placed in the position indicated by Orig


static Value *expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B) {

  Type *ATy = A->getType();

  [[maybe_unused]] Type *BTy = B->getType();

  assert(ATy->isVectorTy() && BTy->isVectorTy());


  IRBuilder<> Builder(Orig);


  auto *AVec = dyn_cast<FixedVectorType>(ATy);


  assert(ATy->getScalarType()->isFloatingPointTy());


  Intrinsic::ID DotIntrinsic = Intrinsic::dx_dot4;

  int NumElts = AVec->getNumElements();

  switch (NumElts) {

  case 2:

    DotIntrinsic = Intrinsic::dx_dot2;

    break;

  case 3:

    DotIntrinsic = Intrinsic::dx_dot3;

    break;

  case 4:

    DotIntrinsic = Intrinsic::dx_dot4;

    break;

  default:

    reportFatalUsageError(

        "Invalid dot product input vector: length is outside 2-4");

    return nullptr;

  }


  SmallVector<Value *> Args;

  for (int I = 0; I < NumElts; ++I)

    Args.push_back(Builder.CreateExtractElement(A, Builder.getInt32(I)));

  for (int I = 0; I < NumElts; ++I)

    Args.push_back(Builder.CreateExtractElement(B, Builder.getInt32(I)));

  return Builder.CreateIntrinsic(ATy->getScalarType(), DotIntrinsic, Args,

                                 nullptr, "dot");

}


// Create the appropriate DXIL float dot intrinsic for the operands of Orig

// The appropriate opcode will be determined by the size of the operands

// The dot product is placed in the position indicated by Orig


static Value *expandFloatDotIntrinsic(CallInst *Orig) {

  return expandFloatDotIntrinsic(Orig, Orig->getOperand(0),

                                 Orig->getOperand(1));

}


// Expand integer dot product to multiply and add ops


static Value *expandIntegerDotIntrinsic(CallInst *Orig,

                                        Intrinsic::ID DotIntrinsic) {

  assert(DotIntrinsic == Intrinsic::dx_sdot ||

         DotIntrinsic == Intrinsic::dx_udot);

  Value *A = Orig->getOperand(0);

  Value *B = Orig->getOperand(1);

  Type *ATy = A->getType();

  [[maybe_unused]] Type *BTy = B->getType();

  assert(ATy->isVectorTy() && BTy->isVectorTy());


  IRBuilder<> Builder(Orig);


  auto *AVec = dyn_cast<FixedVectorType>(ATy);


  assert(ATy->getScalarType()->isIntegerTy());


  Value *Result;

  Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot

                                   ? Intrinsic::dx_imad

                                   : Intrinsic::dx_umad;

  Value *Elt0 = Builder.CreateExtractElement(A, (uint64_t)0);

  Value *Elt1 = Builder.CreateExtractElement(B, (uint64_t)0);

  Result = Builder.CreateMul(Elt0, Elt1);

  for (unsigned I = 1; I < AVec->getNumElements(); I++) {

    Elt0 = Builder.CreateExtractElement(A, I);

    Elt1 = Builder.CreateExtractElement(B, I);

    Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,

                                     ArrayRef<Value *>{Elt0, Elt1, Result},

                                     nullptr, "dx.mad");

  }

  return Result;

}


static Value *expandExpIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  IRBuilder<> Builder(Orig);

  Type *Ty = X->getType();

  Type *EltTy = Ty->getScalarType();

  Constant *Log2eConst =

      Ty->isVectorTy() ? ConstantVector::getSplat(

                             ElementCount::getFixed(

                                 cast<FixedVectorType>(Ty)->getNumElements()),

                             ConstantFP::get(EltTy, numbers::log2ef))

                       : ConstantFP::get(EltTy, numbers::log2ef);

  Value *NewX = Builder.CreateFMul(Log2eConst, X);

  auto *Exp2Call =

      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");

  Exp2Call->setTailCall(Orig->isTailCall());

  Exp2Call->setAttributes(Orig->getAttributes());

  return Exp2Call;

}


static Value *expandIsFPClass(CallInst *Orig) {

  Value *T = Orig->getArgOperand(1);

  auto *TCI = dyn_cast<ConstantInt>(T);


  // These FPClassTest cases have DXIL opcodes, so they will be handled in

  // DXIL Op Lowering instead for all non f16 cases.

  switch (TCI->getZExtValue()) {

  case FPClassTest::fcInf:

    return expand16BitIsInf(Orig);

  case FPClassTest::fcNan:

    return expand16BitIsNaN(Orig);

  case FPClassTest::fcNormal:

    return expand16BitIsNormal(Orig);

  case FPClassTest::fcFinite:

    return expand16BitIsFinite(Orig);

  }


  IRBuilder<> Builder(Orig);


  Value *F = Orig->getArgOperand(0);

  Type *FTy = F->getType();

  unsigned FNumElem = 0; // 0 => F is not a vector


  unsigned BitWidth; // Bit width of F or the ElemTy of F

  Type *BitCastTy;   // An IntNTy of the same bitwidth as F or ElemTy of F


  if (auto *FVecTy = dyn_cast<FixedVectorType>(FTy)) {

    Type *ElemTy = FVecTy->getElementType();

    FNumElem = FVecTy->getNumElements();

    BitWidth = ElemTy->getPrimitiveSizeInBits();

    BitCastTy = FixedVectorType::get(Builder.getIntNTy(BitWidth), FNumElem);

  } else {

    BitWidth = FTy->getPrimitiveSizeInBits();

    BitCastTy = Builder.getIntNTy(BitWidth);

  }


  Value *FBitCast = Builder.CreateBitCast(F, BitCastTy);

  switch (TCI->getZExtValue()) {

  case FPClassTest::fcNegZero: {

    Value *NegZero =

        ConstantInt::get(Builder.getIntNTy(BitWidth), 1 << (BitWidth - 1),

                         /*IsSigned=*/true);

    Value *RetVal;

    if (FNumElem) {

      Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);

      RetVal =

          Builder.CreateICmpEQ(FBitCast, NegZeroSplat, "is.fpclass.negzero");

    } else

      RetVal = Builder.CreateICmpEQ(FBitCast, NegZero, "is.fpclass.negzero");

    return RetVal;

  }

  default:

    reportFatalUsageError("Unsupported FPClassTest");

  }

}


static Value *expandAnyOrAllIntrinsic(CallInst *Orig,

                                      Intrinsic::ID IntrinsicId) {

  Value *X = Orig->getOperand(0);

  IRBuilder<> Builder(Orig);

  Type *Ty = X->getType();

  Type *EltTy = Ty->getScalarType();


  auto ApplyOp = [&Builder](Intrinsic::ID IntrinsicId, Value *Result,

                            Value *Elt) {

    if (IntrinsicId == Intrinsic::dx_any)

      return Builder.CreateOr(Result, Elt);

    assert(IntrinsicId == Intrinsic::dx_all);

    return Builder.CreateAnd(Result, Elt);

  };


  Value *Result = nullptr;

  if (!Ty->isVectorTy()) {

    Result = EltTy->isFloatingPointTy()

                 ? Builder.CreateFCmpUNE(X, ConstantFP::get(EltTy, 0))

                 : Builder.CreateICmpNE(X, ConstantInt::get(EltTy, 0));

  } else {

    auto *XVec = dyn_cast<FixedVectorType>(Ty);

    Value *Cond =

        EltTy->isFloatingPointTy()

            ? Builder.CreateFCmpUNE(

                  X, ConstantVector::getSplat(

                         ElementCount::getFixed(XVec->getNumElements()),

                         ConstantFP::get(EltTy, 0)))

            : Builder.CreateICmpNE(

                  X, ConstantVector::getSplat(

                         ElementCount::getFixed(XVec->getNumElements()),

                         ConstantInt::get(EltTy, 0)));

    Result = Builder.CreateExtractElement(Cond, (uint64_t)0);

    for (unsigned I = 1; I < XVec->getNumElements(); I++) {

      Value *Elt = Builder.CreateExtractElement(Cond, I);

      Result = ApplyOp(IntrinsicId, Result, Elt);

    }

  }

  return Result;

}


static Value *expandLerpIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  Value *Y = Orig->getOperand(1);

  Value *S = Orig->getOperand(2);

  IRBuilder<> Builder(Orig);

  auto *V = Builder.CreateFSub(Y, X);

  V = Builder.CreateFMul(S, V);

  return Builder.CreateFAdd(X, V, "dx.lerp");

}


static Value *expandLogIntrinsic(CallInst *Orig,

                                 float LogConstVal = numbers::ln2f) {

  Value *X = Orig->getOperand(0);

  IRBuilder<> Builder(Orig);

  Type *Ty = X->getType();

  Type *EltTy = Ty->getScalarType();

  Constant *Ln2Const =

      Ty->isVectorTy() ? ConstantVector::getSplat(

                             ElementCount::getFixed(

                                 cast<FixedVectorType>(Ty)->getNumElements()),

                             ConstantFP::get(EltTy, LogConstVal))

                       : ConstantFP::get(EltTy, LogConstVal);

  auto *Log2Call =

      Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");

  Log2Call->setTailCall(Orig->isTailCall());

  Log2Call->setAttributes(Orig->getAttributes());

  return Builder.CreateFMul(Ln2Const, Log2Call);

}


static Value *expandLog10Intrinsic(CallInst *Orig) {

  return expandLogIntrinsic(Orig, numbers::ln2f / numbers::ln10f);

}


// Use dot product of vector operand with itself to calculate the length.

// Divide the vector by that length to normalize it.


static Value *expandNormalizeIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  Type *Ty = Orig->getType();

  Type *EltTy = Ty->getScalarType();

  IRBuilder<> Builder(Orig);


  auto *XVec = dyn_cast<FixedVectorType>(Ty);

  if (!XVec) {

    if (auto *constantFP = dyn_cast<ConstantFP>(X)) {

      const APFloat &fpVal = constantFP->getValueAPF();

      if (fpVal.isZero())

        reportFatalUsageError("Invalid input scalar: length is zero");

    }

    return Builder.CreateFDiv(X, X);

  }


  Value *DotProduct = expandFloatDotIntrinsic(Orig, X, X);


  // verify that the length is non-zero

  // (if the dot product is non-zero, then the length is non-zero)

  if (auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {

    const APFloat &fpVal = constantFP->getValueAPF();

    if (fpVal.isZero())

      reportFatalUsageError("Invalid input vector: length is zero");

  }


  Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,

                                                ArrayRef<Value *>{DotProduct},

                                                nullptr, "dx.rsqrt");


  Value *MultiplicandVec =

      Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);

  return Builder.CreateFMul(X, MultiplicandVec);

}


static Value *expandAtan2Intrinsic(CallInst *Orig) {

  Value *Y = Orig->getOperand(0);

  Value *X = Orig->getOperand(1);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);

  Builder.setFastMathFlags(Orig->getFastMathFlags());


  Value *Tan = Builder.CreateFDiv(Y, X);


  CallInst *Atan =

      Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan");

  Atan->setTailCall(Orig->isTailCall());

  Atan->setAttributes(Orig->getAttributes());


  // Modify atan result based on https://en.wikipedia.org/wiki/Atan2.

  Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi);

  Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2);

  Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2);

  Constant *Zero = ConstantFP::get(Ty, 0);

  Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);

  Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);


  // x > 0 -> atan.

  Value *Result = Atan;

  Value *XLt0 = Builder.CreateFCmpOLT(X, Zero);

  Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero);

  Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero);

  Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero);


  // x < 0, y >= 0 -> atan + pi.

  Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);

  Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);


  // x < 0, y < 0 -> atan - pi.

  Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);

  Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);


  // x == 0, y < 0 -> -pi/2

  Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);

  Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);


  // x == 0, y > 0 -> pi/2

  Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);

  Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);


  return Result;

}


template <bool LeftFunnel>


static Value *expandFunnelShiftIntrinsic(CallInst *Orig) {

  Type *Ty = Orig->getType();

  Value *A = Orig->getOperand(0);

  Value *B = Orig->getOperand(1);

  Value *Shift = Orig->getOperand(2);


  IRBuilder<> Builder(Orig);


  unsigned BitWidth = Ty->getScalarSizeInBits();

  assert(llvm::isPowerOf2_32(BitWidth) &&

         "Can't use Mask to compute modulo and inverse");


  // Note: if (Shift % BitWidth) == 0 then (BitWidth - Shift) == BitWidth,

  // shifting by the bitwidth for shl/lshr returns a poisoned result. As such,

  // we implement the same formula as LegalizerHelper::lowerFunnelShiftAsShifts.

  //

  // The funnel shift is expanded like so:

  // fshl

  //  -> msb_extract((concat(A, B) << (Shift % BitWidth)), BitWidth)

  //  -> A << (Shift % BitWidth) | B >> 1 >> (BitWidth - 1 - (Shift % BitWidth))

  // fshr

  //  -> lsb_extract((concat(A, B) >> (Shift % BitWidth), BitWidth))

  //  -> A << 1 << (BitWidth - 1 - (Shift % BitWidth)) | B >> (Shift % BitWidth)


  // (BitWidth - 1) -> Mask

  Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);


  // Shift % BitWidth

  //  -> Shift & (BitWidth - 1)

  //  -> Shift & Mask

  Value *MaskedShift = Builder.CreateAnd(Shift, Mask);


  // (BitWidth - 1) - (Shift % BitWidth)

  //  -> ~Shift & (BitWidth - 1)

  //  -> ~Shift & Mask

  Value *NotShift = Builder.CreateNot(Shift);

  Value *InverseShift = Builder.CreateAnd(NotShift, Mask);


  Constant *One = ConstantInt::get(Ty, 1);

  Value *ShiftedA;

  Value *ShiftedB;


  if (LeftFunnel) {

    ShiftedA = Builder.CreateShl(A, MaskedShift);

    Value *ShiftB1 = Builder.CreateLShr(B, One);

    ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);

  } else {

    Value *ShiftA1 = Builder.CreateShl(A, One);

    ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);

    ShiftedB = Builder.CreateLShr(B, MaskedShift);

  }


  Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);

  return Result;

}


static Value *expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId) {


  Value *X = Orig->getOperand(0);

  Value *Y = Orig->getOperand(1);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);


  if (IntrinsicId == Intrinsic::powi)

    Y = Builder.CreateSIToFP(Y, Ty);


  auto *Log2Call =

      Builder.CreateIntrinsic(Ty, Intrinsic::log2, {X}, nullptr, "elt.log2");

  auto *Mul = Builder.CreateFMul(Log2Call, Y);

  auto *Exp2Call =

      Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {Mul}, nullptr, "elt.exp2");

  Exp2Call->setTailCall(Orig->isTailCall());

  Exp2Call->setAttributes(Orig->getAttributes());

  return Exp2Call;

}


static Value *expandStepIntrinsic(CallInst *Orig) {


  Value *X = Orig->getOperand(0);

  Value *Y = Orig->getOperand(1);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);


  Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);

  Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);

  Value *Cond = Builder.CreateFCmpOLT(Y, X);


  if (Ty != Ty->getScalarType()) {

    auto *XVec = dyn_cast<FixedVectorType>(Ty);

    One = ConstantVector::getSplat(

        ElementCount::getFixed(XVec->getNumElements()), One);

    Zero = ConstantVector::getSplat(

        ElementCount::getFixed(XVec->getNumElements()), Zero);

  }


  return Builder.CreateSelect(Cond, Zero, One);

}


static Value *expandRadiansIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);

  Value *PiOver180 = ConstantFP::get(Ty, llvm::numbers::pi / 180.0);

  return Builder.CreateFMul(X, PiOver180);

}


static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {

  IRBuilder<> Builder(Orig);


  Type *BufferTy = Orig->getType()->getStructElementType(0);

  Type *ScalarTy = BufferTy->getScalarType();

  bool IsDouble = ScalarTy->isDoubleTy();

  assert(IsDouble || ScalarTy->isIntegerTy(64) &&

                         "Only expand double or int64 scalars or vectors");

  bool IsVector = false;

  unsigned ExtractNum = 2;

  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {

    ExtractNum = 2 * VT->getNumElements();

    IsVector = true;

    assert(IsRaw || ExtractNum == 4 && "TypedBufferLoad vector must be size 2");

  }


  SmallVector<Value *, 2> Loads;

  Value *Result = PoisonValue::get(BufferTy);

  unsigned Base = 0;

  // If we need to extract more than 4 i32; we need to break it up into

  // more than one load. LoadNum tells us how many i32s we are loading in

  // each load

  while (ExtractNum > 0) {

    unsigned LoadNum = std::min(ExtractNum, 4u);

    Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);


    Type *LoadType = StructType::get(Ty, Builder.getInt1Ty());

    Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;

    SmallVector<Value *, 3> Args = {Orig->getOperand(0), Orig->getOperand(1)};

    if (IsRaw) {

      LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;

      Value *Tmp = Builder.getInt32(4 * Base * 2);

      Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));

    }


    CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);

    Loads.push_back(Load);


    // extract the buffer load's result

    Value *Extract = Builder.CreateExtractValue(Load, {0});


    SmallVector<Value *> ExtractElements;

    for (unsigned I = 0; I < LoadNum; ++I)

      ExtractElements.push_back(

          Builder.CreateExtractElement(Extract, Builder.getInt32(I)));


    // combine into double(s) or int64(s)

    for (unsigned I = 0; I < LoadNum; I += 2) {

      Value *Combined = nullptr;

      if (IsDouble)

        // For doubles, use dx_asdouble intrinsic

        Combined = Builder.CreateIntrinsic(

            Builder.getDoubleTy(), Intrinsic::dx_asdouble,

            {ExtractElements[I], ExtractElements[I + 1]});

      else {

        // For int64, manually combine two int32s

        // First, zero-extend both values to i64

        Value *Lo =

            Builder.CreateZExt(ExtractElements[I], Builder.getInt64Ty());

        Value *Hi =

            Builder.CreateZExt(ExtractElements[I + 1], Builder.getInt64Ty());

        // Shift the high bits left by 32 bits

        Value *ShiftedHi = Builder.CreateShl(Hi, Builder.getInt64(32));

        // OR the high and low bits together

        Combined = Builder.CreateOr(Lo, ShiftedHi);

      }


      if (IsVector)

        Result = Builder.CreateInsertElement(Result, Combined,

                                             Builder.getInt32((I / 2) + Base));

      else

        Result = Combined;

    }


    ExtractNum -= LoadNum;

    Base += LoadNum / 2;

  }


  Value *CheckBit = nullptr;

  for (User *U : make_early_inc_range(Orig->users())) {

    // If it's not a ExtractValueInst, we don't know how to

    // handle it

    auto *EVI = dyn_cast<ExtractValueInst>(U);

    if (!EVI)

      llvm_unreachable("Unexpected user of typedbufferload");


    ArrayRef<unsigned> Indices = EVI->getIndices();

    assert(Indices.size() == 1);


    if (Indices[0] == 0) {

      // Use of the value(s)

      EVI->replaceAllUsesWith(Result);

    } else {

      // Use of the check bit

      assert(Indices[0] == 1 && "Unexpected type for typedbufferload");

      // Note: This does not always match the historical behaviour of DXC.

      // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622

      if (!CheckBit) {

        SmallVector<Value *, 2> CheckBits;

        for (Value *L : Loads)

          CheckBits.push_back(Builder.CreateExtractValue(L, {1}));

        CheckBit = Builder.CreateAnd(CheckBits);

      }

      EVI->replaceAllUsesWith(CheckBit);

    }

    EVI->eraseFromParent();

  }

  Orig->eraseFromParent();

  return true;

}


static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {

  IRBuilder<> Builder(Orig);


  unsigned ValIndex = IsRaw ? 3 : 2;

  Type *BufferTy = Orig->getFunctionType()->getParamType(ValIndex);

  Type *ScalarTy = BufferTy->getScalarType();

  bool IsDouble = ScalarTy->isDoubleTy();

  assert((IsDouble || ScalarTy->isIntegerTy(64)) &&

         "Only expand double or int64 scalars or vectors");


  // Determine if we're dealing with a vector or scalar

  bool IsVector = false;

  unsigned ExtractNum = 2;

  unsigned VecLen = 0;

  if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {

    VecLen = VT->getNumElements();

    assert(IsRaw || VecLen == 2 && "TypedBufferStore vector must be size 2");

    ExtractNum = VecLen * 2;

    IsVector = true;

  }


  // Create the appropriate vector type for the result

  Type *Int32Ty = Builder.getInt32Ty();

  Type *ResultTy = VectorType::get(Int32Ty, ExtractNum, false);

  Value *Val = PoisonValue::get(ResultTy);


  Type *SplitElementTy = Int32Ty;

  if (IsVector)

    SplitElementTy = VectorType::get(SplitElementTy, VecLen, false);


  Value *LowBits = nullptr;

  Value *HighBits = nullptr;

  // Split the 64-bit values into 32-bit components

  if (IsDouble) {

    auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);

    Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,

                                           {Orig->getOperand(ValIndex)});

    LowBits = Builder.CreateExtractValue(Split, 0);

    HighBits = Builder.CreateExtractValue(Split, 1);

  } else {

    // Handle int64 type(s)

    Value *InputVal = Orig->getOperand(ValIndex);

    Constant *ShiftAmt = Builder.getInt64(32);

    if (IsVector)

      ShiftAmt =

          ConstantVector::getSplat(ElementCount::getFixed(VecLen), ShiftAmt);


    // Split into low and high 32-bit parts

    LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);

    Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);

    HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);

  }


  if (IsVector) {

    SmallVector<int, 8> Mask;

    for (unsigned I = 0; I < VecLen; ++I) {

      Mask.push_back(I);

      Mask.push_back(I + VecLen);

    }

    Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);

  } else {

    Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));

    Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));

  }


  // If we need to extract more than 4 i32; we need to break it up into

  // more than one store. StoreNum tells us how many i32s we are storing in

  // each store

  unsigned Base = 0;

  while (ExtractNum > 0) {

    unsigned StoreNum = std::min(ExtractNum, 4u);


    Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;

    SmallVector<Value *, 4> Args = {Orig->getOperand(0), Orig->getOperand(1)};

    if (IsRaw) {

      StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;

      Value *Tmp = Builder.getInt32(4 * Base);

      Args.push_back(Builder.CreateAdd(Orig->getOperand(2), Tmp));

    }


    SmallVector<int, 4> Mask;

    for (unsigned I = 0; I < StoreNum; ++I) {

      Mask.push_back(Base + I);

    }


    Value *SubVal = Val;

    if (VecLen > 2)

      SubVal = Builder.CreateShuffleVector(Val, Mask);


    Args.push_back(SubVal);

    // Create the final intrinsic call

    Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);


    ExtractNum -= StoreNum;

    Base += StoreNum;

  }

  Orig->eraseFromParent();

  return true;

}


static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {

  if (ClampIntrinsic == Intrinsic::dx_uclamp)

    return Intrinsic::umax;

  if (ClampIntrinsic == Intrinsic::dx_sclamp)

    return Intrinsic::smax;

  assert(ClampIntrinsic == Intrinsic::dx_nclamp);

  return Intrinsic::maxnum;

}


static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic) {

  if (ClampIntrinsic == Intrinsic::dx_uclamp)

    return Intrinsic::umin;

  if (ClampIntrinsic == Intrinsic::dx_sclamp)

    return Intrinsic::smin;

  assert(ClampIntrinsic == Intrinsic::dx_nclamp);

  return Intrinsic::minnum;

}


static Value *expandClampIntrinsic(CallInst *Orig,

                                   Intrinsic::ID ClampIntrinsic) {

  Value *X = Orig->getOperand(0);

  Value *Min = Orig->getOperand(1);

  Value *Max = Orig->getOperand(2);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);

  auto *MaxCall = Builder.CreateIntrinsic(Ty, getMaxForClamp(ClampIntrinsic),

                                          {X, Min}, nullptr, "dx.max");

  return Builder.CreateIntrinsic(Ty, getMinForClamp(ClampIntrinsic),

                                 {MaxCall, Max}, nullptr, "dx.min");

}


static Value *expandDegreesIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  Type *Ty = X->getType();

  IRBuilder<> Builder(Orig);

  Value *DegreesRatio = ConstantFP::get(Ty, 180.0 * llvm::numbers::inv_pi);

  return Builder.CreateFMul(X, DegreesRatio);

}


static Value *expandSignIntrinsic(CallInst *Orig) {

  Value *X = Orig->getOperand(0);

  Type *Ty = X->getType();

  Type *ScalarTy = Ty->getScalarType();

  Type *RetTy = Orig->getType();

  Constant *Zero = Constant::getNullValue(Ty);


  IRBuilder<> Builder(Orig);


  Value *GT;

  Value *LT;

  if (ScalarTy->isFloatingPointTy()) {

    GT = Builder.CreateFCmpOLT(Zero, X);

    LT = Builder.CreateFCmpOLT(X, Zero);

  } else {

    assert(ScalarTy->isIntegerTy());

    GT = Builder.CreateICmpSLT(Zero, X);

    LT = Builder.CreateICmpSLT(X, Zero);

  }


  Value *ZextGT = Builder.CreateZExt(GT, RetTy);

  Value *ZextLT = Builder.CreateZExt(LT, RetTy);


  return Builder.CreateSub(ZextGT, ZextLT);

}


static bool expandIntrinsic(Function &F, CallInst *Orig) {

  Value *Result = nullptr;

  Intrinsic::ID IntrinsicId = F.getIntrinsicID();

  switch (IntrinsicId) {

  case Intrinsic::abs:

    Result = expandAbs(Orig);

    break;

  case Intrinsic::assume:

    Orig->eraseFromParent();

    return true;

  case Intrinsic::atan2:

    Result = expandAtan2Intrinsic(Orig);

    break;

  case Intrinsic::fshl:

    Result = expandFunnelShiftIntrinsic<true>(Orig);

    break;

  case Intrinsic::fshr:

    Result = expandFunnelShiftIntrinsic<false>(Orig);

    break;

  case Intrinsic::exp:

    Result = expandExpIntrinsic(Orig);

    break;

  case Intrinsic::is_fpclass:

    Result = expandIsFPClass(Orig);

    break;

  case Intrinsic::log:

    Result = expandLogIntrinsic(Orig);

    break;

  case Intrinsic::log10:

    Result = expandLog10Intrinsic(Orig);

    break;

  case Intrinsic::pow:

  case Intrinsic::powi:

    Result = expandPowIntrinsic(Orig, IntrinsicId);

    break;

  case Intrinsic::dx_all:

  case Intrinsic::dx_any:

    Result = expandAnyOrAllIntrinsic(Orig, IntrinsicId);

    break;

  case Intrinsic::dx_cross:

    Result = expandCrossIntrinsic(Orig);

    break;

  case Intrinsic::dx_uclamp:

  case Intrinsic::dx_sclamp:

  case Intrinsic::dx_nclamp:

    Result = expandClampIntrinsic(Orig, IntrinsicId);

    break;

  case Intrinsic::dx_degrees:

    Result = expandDegreesIntrinsic(Orig);

    break;

  case Intrinsic::dx_isinf:

    Result = expand16BitIsInf(Orig);

    break;

  case Intrinsic::dx_isnan:

    Result = expand16BitIsNaN(Orig);

    break;

  case Intrinsic::dx_lerp:

    Result = expandLerpIntrinsic(Orig);

    break;

  case Intrinsic::dx_normalize:

    Result = expandNormalizeIntrinsic(Orig);

    break;

  case Intrinsic::dx_fdot:

    Result = expandFloatDotIntrinsic(Orig);

    break;

  case Intrinsic::dx_sdot:

  case Intrinsic::dx_udot:

    Result = expandIntegerDotIntrinsic(Orig, IntrinsicId);

    break;

  case Intrinsic::dx_sign:

    Result = expandSignIntrinsic(Orig);

    break;

  case Intrinsic::dx_step:

    Result = expandStepIntrinsic(Orig);

    break;

  case Intrinsic::dx_radians:

    Result = expandRadiansIntrinsic(Orig);

    break;

  case Intrinsic::dx_resource_load_rawbuffer:

    if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true))

      return true;

    break;

  case Intrinsic::dx_resource_store_rawbuffer:

    if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ true))

      return true;

    break;

  case Intrinsic::dx_resource_load_typedbuffer:

    if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ false))

      return true;

    break;

  case Intrinsic::dx_resource_store_typedbuffer:

    if (expandBufferStoreIntrinsic(Orig, /*IsRaw*/ false))

      return true;

    break;

  case Intrinsic::usub_sat:

    Result = expandUsubSat(Orig);

    break;

  case Intrinsic::vector_reduce_add:

  case Intrinsic::vector_reduce_fadd:

    Result = expandVecReduceAdd(Orig, IntrinsicId);

    break;

  }

  if (Result) {

    Orig->replaceAllUsesWith(Result);

    Orig->eraseFromParent();

    return true;

  }

  return false;

}


static bool expansionIntrinsics(Module &M) {

  for (auto &F : make_early_inc_range(M.functions())) {

    if (!isIntrinsicExpansion(F))

      continue;

    bool IntrinsicExpanded = false;

    for (User *U : make_early_inc_range(F.users())) {

      auto *IntrinsicCall = dyn_cast<CallInst>(U);

      if (!IntrinsicCall)

        continue;

      IntrinsicExpanded = expandIntrinsic(F, IntrinsicCall);

    }

    if (F.user_empty() && IntrinsicExpanded)

      F.eraseFromParent();

  }

  return true;

}


PreservedAnalyses DXILIntrinsicExpansion::run(Module &M,

                                              ModuleAnalysisManager &) {

  if (expansionIntrinsics(M))

    return PreservedAnalyses::none();

  return PreservedAnalyses::all();

}


bool DXILIntrinsicExpansionLegacy::runOnModule(Module &M) {

  return expansionIntrinsics(M);

}


char DXILIntrinsicExpansionLegacy::ID = 0;


INITIALIZE_PASS_BEGIN(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,

                      "DXIL Intrinsic Expansion", false, false)

INITIALIZE_PASS_END(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,

                    "DXIL Intrinsic Expansion", false, false)


ModulePass *llvm::createDXILIntrinsicExpansionLegacyPass() {

  return new DXILIntrinsicExpansionLegacy();

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

Passes.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

expand16BitIsNormal
static Value * expand16BitIsNormal(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:163

expandNormalizeIntrinsic
static Value * expandNormalizeIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:580

expandIntrinsic
static bool expandIntrinsic(Function &F, CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:1046

expandClampIntrinsic
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
Definition DXILIntrinsicExpansion.cpp:999

expand16BitIsInf
static Value * expand16BitIsInf(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:55

expansionIntrinsics
static bool expansionIntrinsics(Module &M)
Definition DXILIntrinsicExpansion.cpp:1156

expand16BitIsFinite
static Value * expand16BitIsFinite(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:136

expandLerpIntrinsic
static Value * expandLerpIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:546

expandCrossIntrinsic
static Value * expandCrossIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:312

expandUsubSat
static Value * expandUsubSat(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:251

expandAnyOrAllIntrinsic
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
Definition DXILIntrinsicExpansion.cpp:505

expandVecReduceAdd
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
Definition DXILIntrinsicExpansion.cpp:264

expandAtan2Intrinsic
static Value * expandAtan2Intrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:615

expandLog10Intrinsic
static Value * expandLog10Intrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:574

getMinForClamp
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
Definition DXILIntrinsicExpansion.cpp:990

expandStepIntrinsic
static Value * expandStepIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:740

expandIntegerDotIntrinsic
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
Definition DXILIntrinsicExpansion.cpp:397

expandBufferStoreIntrinsic
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
Definition DXILIntrinsicExpansion.cpp:881

expandLogIntrinsic
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
Definition DXILIntrinsicExpansion.cpp:556

expandDegreesIntrinsic
static Value * expandDegreesIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:1012

expandPowIntrinsic
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
Definition DXILIntrinsicExpansion.cpp:720

resourceAccessNeeds64BitExpansion
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
Definition DXILIntrinsicExpansion.cpp:46

expandExpIntrinsic
static Value * expandExpIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:430

expand16BitIsNaN
static Value * expand16BitIsNaN(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:90

expandSignIntrinsic
static Value * expandSignIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:1020

getMaxForClamp
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
Definition DXILIntrinsicExpansion.cpp:981

expandAbs
static Value * expandAbs(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:296

expandFloatDotIntrinsic
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
Definition DXILIntrinsicExpansion.cpp:350

expandRadiansIntrinsic
static Value * expandRadiansIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:762

isIntrinsicExpansion
static bool isIntrinsicExpansion(Function &F)
Definition DXILIntrinsicExpansion.cpp:199

expandBufferLoadIntrinsic
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
Definition DXILIntrinsicExpansion.cpp:770

expandIsFPClass
static Value * expandIsFPClass(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:449

expandFunnelShiftIntrinsic
static Value * expandFunnelShiftIntrinsic(CallInst *Orig)
Definition DXILIntrinsicExpansion.cpp:664

DXILIntrinsicExpansion.h

DirectX.h

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

IRBuilder.h

Instruction.h

Module.h
Module.h This file contains the declarations for the Module class.

PassManager.h
This header defines various interfaces for pass management in LLVM.

Type.h

InstrTypes.h

Instructions.h

Intrinsics.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MathExtras.h

T
#define T
Definition Mips16ISelLowering.cpp:282

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

getNumElements
static unsigned getNumElements(Type *Ty)
Definition SLPVectorizer.cpp:276

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

SmallVector.h
This file defines the SmallVector class.

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:75

DXILIntrinsicExpansionLegacy
Definition DXILIntrinsicExpansion.cpp:37

DXILIntrinsicExpansionLegacy::ID
static char ID
Definition DXILIntrinsicExpansion.cpp:43

DXILIntrinsicExpansionLegacy::runOnModule
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
Definition DXILIntrinsicExpansion.cpp:1180

DXILIntrinsicExpansionLegacy::DXILIntrinsicExpansionLegacy
DXILIntrinsicExpansionLegacy()
Definition DXILIntrinsicExpansion.cpp:41

VectorType
Definition ItaniumDemangle.h:1189

llvm::APFloat
Definition APFloat.h:1021

llvm::APFloat::isZero
bool isZero() const
Definition APFloat.h:1508

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142

llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition InstrTypes.h:1425

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1290

llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition InstrTypes.h:1203

llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the attributes for this call.
Definition InstrTypes.h:1422

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1511

llvm::CallInst::isTailCall
bool isTailCall() const
Definition Instructions.h:1622

llvm::CallInst::setTailCall
void setTailCall(bool IsTc=true)
Definition Instructions.h:1635

llvm::ConstantVector::getSplat
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition Constants.cpp:1495

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:390

llvm::Constant::isZeroValue
LLVM_ABI bool isZeroValue() const
Return true if the value is negative zero or null value.
Definition Constants.cpp:76

llvm::DXILIntrinsicExpansion::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
Definition DXILIntrinsicExpansion.cpp:1173

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::FixedVectorType::get
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition DerivedTypes.h:137

llvm::Function
Definition Function.h:64

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2775

llvm::Instruction::getModule
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition Instruction.cpp:82

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::getFastMathFlags
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition Instruction.cpp:696

llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255

llvm::ModulePass::ModulePass
ModulePass(char &pid)
Definition Pass.h:257

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1905

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:419

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1205

llvm::StructType::get
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Type::getStructElementType
LLVM_ABI Type * getStructElementType(unsigned N) const
Definition DerivedTypes.h:393

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197

llvm::Type::getInt16Ty
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:295

llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300

llvm::User
Definition User.h:44

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553

llvm::Value::users
iterator_range< user_iterator > users()
Definition Value.h:426

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VersionTuple
Represents a version number in the form major[.minor[.subminor[.build]]].
Definition VersionTuple.h:30

uint64_t

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::numbers::inv_pi
constexpr double inv_pi
Definition STLForwardCompat.h:70

llvm::numbers::ln10f
constexpr float ln10f
Definition MathExtras.h:50

llvm::numbers::log2ef
constexpr float log2ef
Definition MathExtras.h:51

llvm::numbers::pi
constexpr double pi
Definition STLForwardCompat.h:69

llvm::numbers::ln2f
constexpr float ln2f
Definition MathExtras.h:49

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::Int32Ty
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::fcInf
@ fcInf
Definition FloatingPointMode.h:277

llvm::fcNegZero
@ fcNegZero
Definition FloatingPointMode.h:270

llvm::fcFinite
@ fcFinite
Definition FloatingPointMode.h:283

llvm::fcNormal
@ fcNormal
Definition FloatingPointMode.h:278

llvm::fcNan
@ fcNan
Definition FloatingPointMode.h:276

llvm::createDXILIntrinsicExpansionLegacyPass
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
Definition DXILIntrinsicExpansion.cpp:1191

llvm::PackElem::Hi
@ Hi
Definition VECustomDAG.h:132

llvm::PackElem::Lo
@ Lo
Definition VECustomDAG.h:131

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::ModuleAnalysisManager
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177