doxygen/R600ISelLowering_8cpp_source.html

//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Custom DAG lowering for R600

//

//===----------------------------------------------------------------------===//


#include "R600ISelLowering.h"

#include "AMDGPU.h"

#include "AMDGPUSelectionDAGInfo.h"

#include "MCTargetDesc/R600MCTargetDesc.h"

#include "R600Defines.h"

#include "R600MachineFunctionInfo.h"

#include "R600Subtarget.h"

#include "R600TargetMachine.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/Passes/CodeGenPassBuilder.h"


using namespace llvm;


#include "R600GenCallingConv.inc"


R600TargetLowering::R600TargetLowering(const TargetMachine &TM,

                                       const R600Subtarget &STI)

    : AMDGPUTargetLowering(TM, STI, STI), Subtarget(&STI),

      Gen(STI.getGeneration()) {

  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);

  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);

  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);

  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);

  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);

  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);


  setBooleanContents(ZeroOrNegativeOneBooleanContent);

  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);


  computeRegisterProperties(Subtarget->getRegisterInfo());


  // Legalize loads and stores to the private address space.

  setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);


  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address

  // spaces, so it is custom lowered to handle those where it isn't.

  for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})

    for (MVT VT : MVT::integer_valuetypes()) {

      setLoadExtAction(Op, VT, MVT::i1, Promote);

      setLoadExtAction(Op, VT, MVT::i8, Custom);

      setLoadExtAction(Op, VT, MVT::i16, Custom);

    }


  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.

  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i32,

                   MVT::v2i1, Expand);


  setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v4i32,

                   MVT::v4i1, Expand);


  setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},

                     Custom);


  setTruncStoreAction(MVT::i32, MVT::i8, Custom);

  setTruncStoreAction(MVT::i32, MVT::i16, Custom);

  // We need to include these since trunc STORES to PRIVATE need

  // special handling to accommodate RMW

  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);

  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);

  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);

  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);

  setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);

  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);

  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);

  setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);

  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);

  setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);


  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.

  setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);

  setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);


  // Set condition code actions

  setCondCodeAction({ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT,

                     ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE,

                     ISD::SETUGT, ISD::SETULT, ISD::SETULE},

                    MVT::f32, Expand);


  setCondCodeAction({ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT},

                    MVT::i32, Expand);


  setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom);


  setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);


  setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);

  setOperationAction(ISD::BRCOND, MVT::Other, Custom);


  setOperationAction(ISD::FSUB, MVT::f32, Expand);


  setOperationAction(ISD::IS_FPCLASS,

                     {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,

                      MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},

                     Expand);


  setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},

                     MVT::f64, Custom);


  setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);


  setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);

  setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},

                     Custom);


  setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},

                     Expand);


  // ADD, SUB overflow.

  // TODO: turn these into Legal?

  if (Subtarget->hasCARRY())

    setOperationAction(ISD::UADDO, MVT::i32, Custom);


  if (Subtarget->hasBORROW())

    setOperationAction(ISD::USUBO, MVT::i32, Custom);


  // Expand sign extension of vectors

  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);


  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);


  if (!Subtarget->hasBFE())

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);


  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);


  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);


  setOperationAction(ISD::EXTRACT_VECTOR_ELT,

                     {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);


  setOperationAction(ISD::INSERT_VECTOR_ELT,

                     {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);


  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32

  //  to be Legal/Custom in order to avoid library calls.

  setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, MVT::i32,

                     Custom);


  if (!Subtarget->hasFMA())

    setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);


  // FIXME: May need no denormals check

  setOperationAction(ISD::FMAD, MVT::f32, Legal);


  if (!Subtarget->hasBFI())

    // fcopysign can be done in a single instruction with BFI.

    setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);


  if (!Subtarget->hasBCNT(32))

    setOperationAction(ISD::CTPOP, MVT::i32, Expand);


  if (!Subtarget->hasBCNT(64))

    setOperationAction(ISD::CTPOP, MVT::i64, Expand);


  if (Subtarget->hasFFBH())

    setOperationAction(ISD::CTLZ_ZERO_POISON, MVT::i32, Custom);


  if (Subtarget->hasFFBL())

    setOperationAction(ISD::CTTZ_ZERO_POISON, MVT::i32, Custom);


  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we

  // need it for R600.

  if (Subtarget->hasBFE())

    setHasExtractBitsInsn(true);


  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);

  setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);


  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

  for (MVT VT : ScalarIntVTs)

    setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT,

                       Expand);


  // LLVM will expand these to atomic_cmp_swap(0)

  // and atomic_swap, respectively.

  setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand);


  // We need to custom lower some of the intrinsics

  setOperationAction({ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, MVT::Other,

                     Custom);


  setSchedulingPreference(Sched::Source);


  setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT,

                       ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD});

}


static inline bool isEOP(MachineBasicBlock::iterator I) {

  if (std::next(I) == I->getParent()->end())

    return false;

  return std::next(I)->getOpcode() == R600::RETURN;

}


MachineBasicBlock *


R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

                                                MachineBasicBlock *BB) const {

  MachineFunction *MF = BB->getParent();

  MachineRegisterInfo &MRI = MF->getRegInfo();

  MachineBasicBlock::iterator I = MI;

  const R600InstrInfo *TII = Subtarget->getInstrInfo();


  switch (MI.getOpcode()) {

  default:

    // Replace LDS_*_RET instruction that don't have any uses with the

    // equivalent LDS_*_NORET instruction.

    if (TII->isLDSRetInstr(MI.getOpcode())) {

      int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);

      assert(DstIdx != -1);

      MachineInstrBuilder NewMI;

      // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add

      //        LDS_1A2D support and remove this special case.

      if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||

          MI.getOpcode() == R600::LDS_CMPST_RET)

        return BB;


      NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),

                      TII->get(R600::getLDSNoRetOp(MI.getOpcode())));

      for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))

        NewMI.add(MO);

    } else {

      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);

    }

    break;


  case R600::FABS_R600: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, I, R600::MOV, MI.getOperand(0).getReg(),

        MI.getOperand(1).getReg());

    TII->addFlag(*NewMI, 0, MO_FLAG_ABS);

    break;

  }


  case R600::FNEG_R600: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, I, R600::MOV, MI.getOperand(0).getReg(),

        MI.getOperand(1).getReg());

    TII->addFlag(*NewMI, 0, MO_FLAG_NEG);

    break;

  }


  case R600::MASK_WRITE: {

    Register maskedRegister = MI.getOperand(0).getReg();

    assert(maskedRegister.isVirtual());

    MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);

    TII->addFlag(*defInstr, 0, MO_FLAG_MASK);

    break;

  }


  case R600::MOV_IMM_F32:

    TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)

                                                            .getFPImm()

                                                            ->getValueAPF()

                                                            .bitcastToAPInt()

                                                            .getZExtValue());

    break;


  case R600::MOV_IMM_I32:

    TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),

                     MI.getOperand(1).getImm());

    break;


  case R600::MOV_IMM_GLOBAL_ADDR: {

    //TODO: Perhaps combine this instruction with the next if possible

    auto MIB = TII->buildDefaultInstruction(

        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);

    int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);

    //TODO: Ugh this is rather ugly

    const MachineOperand &MO = MI.getOperand(1);

    MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),

                                    MO.getTargetFlags());

    break;

  }


  case R600::CONST_COPY: {

    MachineInstr *NewMI = TII->buildDefaultInstruction(

        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);

    TII->setImmOperand(*NewMI, R600::OpName::src0_sel,

                       MI.getOperand(1).getImm());

    break;

  }


  case R600::RAT_WRITE_CACHELESS_32_eg:

  case R600::RAT_WRITE_CACHELESS_64_eg:

  case R600::RAT_WRITE_CACHELESS_128_eg:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .addImm(isEOP(I)); // Set End of program bit

    break;


  case R600::RAT_STORE_TYPED_eg:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .add(MI.getOperand(2))

        .addImm(isEOP(I)); // Set End of program bit

    break;


  case R600::BRANCH:

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))

        .add(MI.getOperand(0));

    break;


  case R600::BRANCH_COND_f32: {

    MachineInstr *NewMI =

        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),

                R600::PREDICATE_BIT)

            .add(MI.getOperand(1))

            .addImm(R600::PRED_SETNE)

            .addImm(0); // Flags

    TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))

        .add(MI.getOperand(0))

        .addReg(R600::PREDICATE_BIT, RegState::Kill);

    break;

  }


  case R600::BRANCH_COND_i32: {

    MachineInstr *NewMI =

        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),

                R600::PREDICATE_BIT)

            .add(MI.getOperand(1))

            .addImm(R600::PRED_SETNE_INT)

            .addImm(0); // Flags

    TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))

        .add(MI.getOperand(0))

        .addReg(R600::PREDICATE_BIT, RegState::Kill);

    break;

  }


  case R600::EG_ExportSwz:

  case R600::R600_ExportSwz: {

    // Instruction is left unmodified if its not the last one of its type

    bool isLastInstructionOfItsType = true;

    unsigned InstExportType = MI.getOperand(1).getImm();

    for (MachineBasicBlock::iterator NextExportInst = std::next(I),

         EndBlock = BB->end(); NextExportInst != EndBlock;

         NextExportInst = std::next(NextExportInst)) {

      if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||

          NextExportInst->getOpcode() == R600::R600_ExportSwz) {

        unsigned CurrentInstExportType = NextExportInst->getOperand(1)

            .getImm();

        if (CurrentInstExportType == InstExportType) {

          isLastInstructionOfItsType = false;

          break;

        }

      }

    }

    bool EOP = isEOP(I);

    if (!EOP && !isLastInstructionOfItsType)

      return BB;

    unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;

    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))

        .add(MI.getOperand(0))

        .add(MI.getOperand(1))

        .add(MI.getOperand(2))

        .add(MI.getOperand(3))

        .add(MI.getOperand(4))

        .add(MI.getOperand(5))

        .add(MI.getOperand(6))

        .addImm(CfInst)

        .addImm(EOP);

    break;

  }

  case R600::RETURN: {

    return BB;

  }

  }


  MI.eraseFromParent();

  return BB;

}


//===----------------------------------------------------------------------===//

// Custom DAG Lowering Operations

//===----------------------------------------------------------------------===//


SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();

  switch (Op.getOpcode()) {

  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);

  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);

  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

  case ISD::SHL_PARTS:

  case ISD::SRA_PARTS:

  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);

  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);

  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);

  case ISD::FCOS:

  case ISD::FSIN: return LowerTrig(Op, DAG);

  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

  case ISD::STORE: return LowerSTORE(Op, DAG);

  case ISD::LOAD: {

    SDValue Result = LowerLOAD(Op, DAG);

    assert((!Result.getNode() ||

            Result.getNode()->getNumValues() == 2) &&

           "Load should return a value and a chain");

    return Result;

  }


  case ISD::BRCOND: return LowerBRCOND(Op, DAG);

  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);

  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);

  case ISD::ADDRSPACECAST:

    return lowerADDRSPACECAST(Op, DAG);

  case ISD::INTRINSIC_VOID: {

    SDValue Chain = Op.getOperand(0);

    unsigned IntrinsicID = Op.getConstantOperandVal(1);

    switch (IntrinsicID) {

    case Intrinsic::r600_store_swizzle: {

      SDLoc DL(Op);

      const SDValue Args[8] = {

        Chain,

        Op.getOperand(2), // Export Value

        Op.getOperand(3), // ArrayBase

        Op.getOperand(4), // Type

        DAG.getConstant(0, DL, MVT::i32), // SWZ_X

        DAG.getConstant(1, DL, MVT::i32), // SWZ_Y

        DAG.getConstant(2, DL, MVT::i32), // SWZ_Z

        DAG.getConstant(3, DL, MVT::i32) // SWZ_W

      };

      return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);

    }


    // default for switch(IntrinsicID)

    default: break;

    }

    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())

    break;

  }

  case ISD::INTRINSIC_WO_CHAIN: {

    unsigned IntrinsicID = Op.getConstantOperandVal(0);

    EVT VT = Op.getValueType();

    SDLoc DL(Op);

    switch (IntrinsicID) {

    case Intrinsic::r600_tex:

    case Intrinsic::r600_texc: {

      unsigned TextureOp;

      switch (IntrinsicID) {

      case Intrinsic::r600_tex:

        TextureOp = 0;

        break;

      case Intrinsic::r600_texc:

        TextureOp = 1;

        break;

      default:

        llvm_unreachable("unhandled texture operation");

      }


      SDValue TexArgs[19] = {

        DAG.getConstant(TextureOp, DL, MVT::i32),

        Op.getOperand(1),

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(1, DL, MVT::i32),

        DAG.getConstant(2, DL, MVT::i32),

        DAG.getConstant(3, DL, MVT::i32),

        Op.getOperand(2),

        Op.getOperand(3),

        Op.getOperand(4),

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(1, DL, MVT::i32),

        DAG.getConstant(2, DL, MVT::i32),

        DAG.getConstant(3, DL, MVT::i32),

        Op.getOperand(5),

        Op.getOperand(6),

        Op.getOperand(7),

        Op.getOperand(8),

        Op.getOperand(9),

        Op.getOperand(10)

      };

      return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);

    }

    case Intrinsic::r600_dot4: {

      SDValue Args[8] = {

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(0, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(0, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(1, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(1, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(2, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(2, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),

          DAG.getConstant(3, DL, MVT::i32)),

      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),

          DAG.getConstant(3, DL, MVT::i32))

      };

      return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);

    }


    case Intrinsic::r600_implicitarg_ptr: {

      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);

      uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);

      return DAG.getConstant(ByteOffset, DL, PtrVT);

    }

    case Intrinsic::r600_read_ngroups_x:

      return LowerImplicitParameter(DAG, VT, DL, 0);

    case Intrinsic::r600_read_ngroups_y:

      return LowerImplicitParameter(DAG, VT, DL, 1);

    case Intrinsic::r600_read_ngroups_z:

      return LowerImplicitParameter(DAG, VT, DL, 2);

    case Intrinsic::r600_read_global_size_x:

      return LowerImplicitParameter(DAG, VT, DL, 3);

    case Intrinsic::r600_read_global_size_y:

      return LowerImplicitParameter(DAG, VT, DL, 4);

    case Intrinsic::r600_read_global_size_z:

      return LowerImplicitParameter(DAG, VT, DL, 5);

    case Intrinsic::r600_read_local_size_x:

      return LowerImplicitParameter(DAG, VT, DL, 6);

    case Intrinsic::r600_read_local_size_y:

      return LowerImplicitParameter(DAG, VT, DL, 7);

    case Intrinsic::r600_read_local_size_z:

      return LowerImplicitParameter(DAG, VT, DL, 8);


    case Intrinsic::r600_read_tgid_x:

    case Intrinsic::amdgcn_workgroup_id_x:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_X, VT);

    case Intrinsic::r600_read_tgid_y:

    case Intrinsic::amdgcn_workgroup_id_y:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_Y, VT);

    case Intrinsic::r600_read_tgid_z:

    case Intrinsic::amdgcn_workgroup_id_z:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T1_Z, VT);

    case Intrinsic::r600_read_tidig_x:

    case Intrinsic::amdgcn_workitem_id_x:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_X, VT);

    case Intrinsic::r600_read_tidig_y:

    case Intrinsic::amdgcn_workitem_id_y:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_Y, VT);

    case Intrinsic::r600_read_tidig_z:

    case Intrinsic::amdgcn_workitem_id_z:

      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,

                                     R600::T0_Z, VT);


    case Intrinsic::r600_recipsqrt_ieee:

      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));


    case Intrinsic::r600_recipsqrt_clamped:

      return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));

    default:

      return Op;

    }


    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())

    break;

  }

  } // end switch(Op.getOpcode())

  return SDValue();

}


void R600TargetLowering::ReplaceNodeResults(SDNode *N,

                                            SmallVectorImpl<SDValue> &Results,

                                            SelectionDAG &DAG) const {

  switch (N->getOpcode()) {

  default:

    AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);

    return;

  case ISD::FP_TO_UINT:

    if (N->getValueType(0) == MVT::i1) {

      Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));

      return;

    }

    // Since we don't care about out of bounds values we can use FP_TO_SINT for

    // uints too. The DAGLegalizer code for uint considers some extra cases

    // which are not necessary here.

    [[fallthrough]];

  case ISD::FP_TO_SINT: {

    if (N->getValueType(0) == MVT::i1) {

      Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));

      return;

    }


    SDValue Result;

    if (expandFP_TO_SINT(N, Result, DAG))

      Results.push_back(Result);

    return;

  }

  case ISD::SDIVREM: {

    SDValue Op = SDValue(N, 1);

    SDValue RES = LowerSDIVREM(Op, DAG);

    Results.push_back(RES);

    Results.push_back(RES.getValue(1));

    break;

  }

  case ISD::UDIVREM: {

    SDValue Op = SDValue(N, 0);

    LowerUDIVREM64(Op, DAG, Results);

    break;

  }

  }

}


SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,

                                                   SDValue Vector) const {

  SDLoc DL(Vector);

  EVT VecVT = Vector.getValueType();

  EVT EltVT = VecVT.getVectorElementType();

  SmallVector<SDValue, 8> Args;


  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {

    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,

                               DAG.getVectorIdxConstant(i, DL)));

  }


  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);

}


SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,

                                                    SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Vector = Op.getOperand(0);

  SDValue Index = Op.getOperand(1);


  if (isa<ConstantSDNode>(Index) ||

      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)

    return Op;


  Vector = vectorToVerticalVector(DAG, Vector);

  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),

                     Vector, Index);

}


SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

                                                   SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Vector = Op.getOperand(0);

  SDValue Value = Op.getOperand(1);

  SDValue Index = Op.getOperand(2);


  if (isa<ConstantSDNode>(Index) ||

      Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)

    return Op;


  Vector = vectorToVerticalVector(DAG, Vector);

  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),

                               Vector, Value, Index);

  return vectorToVerticalVector(DAG, Insert);

}


SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,

                                               SDValue Op,

                                               SelectionDAG &DAG) const {

  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);

  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)

    return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);


  const DataLayout &DL = DAG.getDataLayout();

  const GlobalValue *GV = GSD->getGlobal();

  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);


  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);

  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);

}


SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {

  // On hw >= R700, COS/SIN input must be between -1. and 1.

  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)

  EVT VT = Op.getValueType();

  SDValue Arg = Op.getOperand(0);

  SDLoc DL(Op);


  // TODO: Should this propagate fast-math-flags?

  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,

      DAG.getNode(ISD::FADD, DL, VT,

        DAG.getNode(ISD::FMUL, DL, VT, Arg,

          DAG.getConstantFP(0.15915494309, DL, MVT::f32)),

        DAG.getConstantFP(0.5, DL, MVT::f32)));

  unsigned TrigNode;

  switch (Op.getOpcode()) {

  case ISD::FCOS:

    TrigNode = AMDGPUISD::COS_HW;

    break;

  case ISD::FSIN:

    TrigNode = AMDGPUISD::SIN_HW;

    break;

  default:

    llvm_unreachable("Wrong trig opcode");

  }

  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,

      DAG.getNode(ISD::FADD, DL, VT, FractPart,

        DAG.getConstantFP(-0.5, DL, MVT::f32)));

  if (Gen >= AMDGPUSubtarget::R700)

    return TrigVal;

  // On R600 hw, COS/SIN input must be between -Pi and Pi.

  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,

      DAG.getConstantFP(numbers::pif, DL, MVT::f32));

}


SDValue R600TargetLowering::LowerShiftParts(SDValue Op,

                                            SelectionDAG &DAG) const {

  SDValue Lo, Hi;

  expandShiftParts(Op.getNode(), Lo, Hi, DAG);

  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));

}


SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,

                                          unsigned mainop, unsigned ovf) const {

  SDLoc DL(Op);

  EVT VT = Op.getValueType();


  SDValue Lo = Op.getOperand(0);

  SDValue Hi = Op.getOperand(1);


  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);

  // Extend sign.

  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,

                    DAG.getValueType(MVT::i1));


  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);


  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);

}


SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  return DAG.getNode(

      ISD::SETCC,

      DL,

      MVT::i1,

      Op, DAG.getConstantFP(1.0f, DL, MVT::f32),

      DAG.getCondCode(ISD::SETEQ));

}


SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  return DAG.getNode(

      ISD::SETCC,

      DL,

      MVT::i1,

      Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),

      DAG.getCondCode(ISD::SETEQ));

}


SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,

                                                   const SDLoc &DL,

                                                   unsigned DwordOffset) const {

  unsigned ByteOffset = DwordOffset * 4;

  PointerType *PtrType =

      PointerType::get(*DAG.getContext(), AMDGPUAS::PARAM_I_ADDRESS);


  // We shouldn't be using an offset wider than 16-bits for implicit parameters.

  assert(isInt<16>(ByteOffset));


  return DAG.getLoad(VT, DL, DAG.getEntryNode(),

                     DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR

                     MachinePointerInfo(ConstantPointerNull::get(PtrType)));

}


bool R600TargetLowering::isZero(SDValue Op) const {

  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))

    return Cst->isZero();

  if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))

    return CstFP->isZero();

  return false;

}


bool R600TargetLowering::isHWTrueValue(SDValue Op) const {

  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {

    return CFP->isExactlyValue(1.0);

  }

  return isAllOnesConstant(Op);

}


bool R600TargetLowering::isHWFalseValue(SDValue Op) const {

  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {

    return CFP->getValueAPF().isZero();

  }

  return isNullConstant(Op);

}


SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);

  EVT VT = Op.getValueType();


  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  SDValue True = Op.getOperand(2);

  SDValue False = Op.getOperand(3);

  SDValue CC = Op.getOperand(4);

  SDValue Temp;


  if (VT == MVT::f32) {

    DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);

    SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);

    if (MinMax)

      return MinMax;

  }


  // LHS and RHS are guaranteed to be the same value type

  EVT CompareVT = LHS.getValueType();


  // Check if we can lower this to a native operation.


  // Try to lower to a SET* instruction:

  //

  // SET* can match the following patterns:

  //

  // select_cc f32, f32, -1,  0, cc_supported

  // select_cc f32, f32, 1.0f, 0.0f, cc_supported

  // select_cc i32, i32, -1,  0, cc_supported

  //


  // Move hardware True/False values to the correct operand.

  if (isHWTrueValue(False) && isHWFalseValue(True)) {

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);

    if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {

      std::swap(False, True);

      CC = DAG.getCondCode(InverseCC);

    } else {

      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);

      if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {

        std::swap(False, True);

        std::swap(LHS, RHS);

        CC = DAG.getCondCode(SwapInvCC);

      }

    }

  }


  if (isHWTrueValue(True) && isHWFalseValue(False) &&

      (CompareVT == VT || VT == MVT::i32)) {

    // This can be matched by a SET* instruction.

    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);

  }


  // Try to lower to a CND* instruction:

  //

  // CND* can match the following patterns:

  //

  // select_cc f32, 0.0, f32, f32, cc_supported

  // select_cc f32, 0.0, i32, i32, cc_supported

  // select_cc i32, 0,   f32, f32, cc_supported

  // select_cc i32, 0,   i32, i32, cc_supported

  //


  // Try to move the zero value to the RHS

  if (isZero(LHS)) {

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    // Try swapping the operands

    ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);

    if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {

      std::swap(LHS, RHS);

      CC = DAG.getCondCode(CCSwapped);

    } else {

      // Try inverting the condition and then swapping the operands

      ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);

      CCSwapped = ISD::getSetCCSwappedOperands(CCInv);

      if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {

        std::swap(True, False);

        std::swap(LHS, RHS);

        CC = DAG.getCondCode(CCSwapped);

      }

    }

  }

  if (isZero(RHS)) {

    SDValue Cond = LHS;

    SDValue Zero = RHS;

    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();

    if (CompareVT != VT) {

      // Bitcast True / False to the correct types.  This will end up being

      // a nop, but it allows us to define only a single pattern in the

      // .TD files for each CND* instruction rather than having to have

      // one pattern for integer True/False and one for fp True/False

      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);

      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);

    }


    switch (CCOpcode) {

    case ISD::SETONE:

    case ISD::SETUNE:

    case ISD::SETNE:

      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);

      Temp = True;

      True = False;

      False = Temp;

      break;

    default:

      break;

    }

    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,

        Cond, Zero,

        True, False,

        DAG.getCondCode(CCOpcode));

    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);

  }


  // If we make it this for it means we have no native instructions to handle

  // this SELECT_CC, so we must lower it.

  SDValue HWTrue, HWFalse;


  if (CompareVT == MVT::f32) {

    HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);

    HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);

  } else if (CompareVT == MVT::i32) {

    HWTrue = DAG.getAllOnesConstant(DL, CompareVT);

    HWFalse = DAG.getConstant(0, DL, CompareVT);

  }

  else {

    llvm_unreachable("Unhandled value type in LowerSELECT_CC");

  }


  // Lower this unsupported SELECT_CC into a combination of two supported

  // SELECT_CC operations.

  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);


  return DAG.getNode(ISD::SELECT_CC, DL, VT,

      Cond, HWFalse,

      True, False,

      DAG.getCondCode(ISD::SETNE));

}


SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,

                                               SelectionDAG &DAG) const {

  SDLoc SL(Op);

  EVT VT = Op.getValueType();


  const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);

  unsigned SrcAS = ASC->getSrcAddressSpace();

  unsigned DestAS = ASC->getDestAddressSpace();


  if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)

    return DAG.getSignedConstant(AMDGPU::getNullPointerValue(DestAS), SL, VT);


  return Op;

}


/// LLVM generates byte-addressed pointers.  For indirect addressing, we need to

/// convert these pointers to a register index.  Each register holds

/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the

/// \p StackWidth, which tells us how many of the 4 sub-registers will be used

/// for indirect addressing.

SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,

                                               unsigned StackWidth,

                                               SelectionDAG &DAG) const {

  unsigned SRLPad;

  switch(StackWidth) {

  case 1:

    SRLPad = 2;

    break;

  case 2:

    SRLPad = 3;

    break;

  case 4:

    SRLPad = 4;

    break;

  default: llvm_unreachable("Invalid stack width");

  }


  SDLoc DL(Ptr);

  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,

                     DAG.getConstant(SRLPad, DL, MVT::i32));

}


void R600TargetLowering::getStackAddress(unsigned StackWidth,

                                         unsigned ElemIdx,

                                         unsigned &Channel,

                                         unsigned &PtrIncr) const {

  switch (StackWidth) {

  default:

  case 1:

    Channel = 0;

    if (ElemIdx > 0) {

      PtrIncr = 1;

    } else {

      PtrIncr = 0;

    }

    break;

  case 2:

    Channel = ElemIdx % 2;

    if (ElemIdx == 2) {

      PtrIncr = 1;

    } else {

      PtrIncr = 0;

    }

    break;

  case 4:

    Channel = ElemIdx;

    PtrIncr = 0;

    break;

  }

}


SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,

                                                   SelectionDAG &DAG) const {

  SDLoc DL(Store);

  //TODO: Who creates the i8 stores?

  assert(Store->isTruncatingStore()

         || Store->getValue().getValueType() == MVT::i8);

  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);


  SDValue Mask;

  if (Store->getMemoryVT() == MVT::i8) {

    assert(Store->getAlign() >= 1);

    Mask = DAG.getConstant(0xff, DL, MVT::i32);

  } else if (Store->getMemoryVT() == MVT::i16) {

    assert(Store->getAlign() >= 2);

    Mask = DAG.getConstant(0xffff, DL, MVT::i32);

  } else {

    llvm_unreachable("Unsupported private trunc store");

  }


  SDValue OldChain = Store->getChain();

  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);

  // Skip dummy

  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;

  SDValue BasePtr = Store->getBasePtr();

  SDValue Offset = Store->getOffset();

  EVT MemVT = Store->getMemoryVT();


  SDValue LoadPtr = BasePtr;

  if (!Offset.isUndef()) {

    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);

  }


  // Get dword location

  // TODO: this should be eliminated by the future SHR ptr, 2

  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                            DAG.getConstant(0xfffffffc, DL, MVT::i32));


  // Load dword

  // TODO: can we be smarter about machine pointer info?

  MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);

  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);


  Chain = Dst.getValue(1);


  // Get offset in dword

  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                                DAG.getConstant(0x3, DL, MVT::i32));


  // Convert byte offset to bit shift

  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,

                                 DAG.getConstant(3, DL, MVT::i32));


  // TODO: Contrary to the name of the function,

  // it also handles sub i32 non-truncating stores (like i1)

  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,

                                  Store->getValue());


  // Mask the value to the right type

  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);


  // Shift the value in place

  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,

                                     MaskedValue, ShiftAmt);


  // Shift the mask in place

  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);


  // Invert the mask. NOTE: if we had native ROL instructions we could

  // use inverted mask

  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);


  // Cleanup the target bits

  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);


  // Add the new bits

  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);


  // Store dword

  // TODO: Can we be smarter about MachinePointerInfo?

  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);


  // If we are part of expanded vector, make our neighbors depend on this store

  if (VectorTrunc) {

    // Make all other vector elements depend on this store

    Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);

    DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);

  }

  return NewStore;

}


SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);

  unsigned AS = StoreNode->getAddressSpace();


  SDValue Chain = StoreNode->getChain();

  SDValue Ptr = StoreNode->getBasePtr();

  SDValue Value = StoreNode->getValue();


  EVT VT = Value.getValueType();

  EVT MemVT = StoreNode->getMemoryVT();

  EVT PtrVT = Ptr.getValueType();


  SDLoc DL(Op);


  const bool TruncatingStore = StoreNode->isTruncatingStore();


  // Neither LOCAL nor PRIVATE can do vectors at the moment

  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||

       TruncatingStore) &&

      VT.isVector()) {

    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {

      // Add an extra level of chain to isolate this vector

      SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);

      SmallVector<SDValue, 4> NewOps(StoreNode->ops());

      NewOps[0] = NewChain;

      StoreNode = cast<StoreSDNode>(DAG.UpdateNodeOperands(StoreNode, NewOps));

    }


    return scalarizeVectorStore(StoreNode, DAG);

  }


  Align Alignment = StoreNode->getAlign();

  if (Alignment < MemVT.getStoreSize() &&

      !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,

                                      StoreNode->getMemOperand()->getFlags(),

                                      nullptr)) {

    return expandUnalignedStore(StoreNode, DAG);

  }


  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,

                                  DAG.getConstant(2, DL, PtrVT));


  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {

    // It is beneficial to create MSKOR here instead of combiner to avoid

    // artificial dependencies introduced by RMW

    if (TruncatingStore) {

      assert(VT.bitsLE(MVT::i32));

      SDValue MaskConstant;

      if (MemVT == MVT::i8) {

        MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);

      } else {

        assert(MemVT == MVT::i16);

        assert(StoreNode->getAlign() >= 2);

        MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);

      }


      SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,

                                      DAG.getConstant(0x00000003, DL, PtrVT));

      SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,

                                     DAG.getConstant(3, DL, VT));


      // Put the mask in correct place

      SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);


      // Put the value bits in correct place

      SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);

      SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);


      // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32

      // vector instead.

      SDValue Src[4] = {

        ShiftedValue,

        DAG.getConstant(0, DL, MVT::i32),

        DAG.getConstant(0, DL, MVT::i32),

        Mask

      };

      SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);

      SDValue Args[3] = { Chain, Input, DWordAddr };

      return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,

                                     Op->getVTList(), Args, MemVT,

                                     StoreNode->getMemOperand());

    }

    if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {

      // Convert pointer from byte address to dword address.

      Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);


      if (StoreNode->isIndexed()) {

        llvm_unreachable("Indexed stores not supported yet");

      } else {

        Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());

      }

      return Chain;

    }

  }


  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes

  if (AS != AMDGPUAS::PRIVATE_ADDRESS)

    return SDValue();


  if (MemVT.bitsLT(MVT::i32))

    return lowerPrivateTruncStore(StoreNode, DAG);


  // Standard i32+ store, tag it with DWORDADDR to note that the address

  // has been shifted

  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {

    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);

    return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());

  }


  // Tagged i32+ stores will be matched by patterns

  return SDValue();

}


// return (512 + (kc_bank << 12)

static int


ConstantAddressBlock(unsigned AddressSpace) {

  switch (AddressSpace) {

  case AMDGPUAS::CONSTANT_BUFFER_0:

    return 512;

  case AMDGPUAS::CONSTANT_BUFFER_1:

    return 512 + 4096;

  case AMDGPUAS::CONSTANT_BUFFER_2:

    return 512 + 4096 * 2;

  case AMDGPUAS::CONSTANT_BUFFER_3:

    return 512 + 4096 * 3;

  case AMDGPUAS::CONSTANT_BUFFER_4:

    return 512 + 4096 * 4;

  case AMDGPUAS::CONSTANT_BUFFER_5:

    return 512 + 4096 * 5;

  case AMDGPUAS::CONSTANT_BUFFER_6:

    return 512 + 4096 * 6;

  case AMDGPUAS::CONSTANT_BUFFER_7:

    return 512 + 4096 * 7;

  case AMDGPUAS::CONSTANT_BUFFER_8:

    return 512 + 4096 * 8;

  case AMDGPUAS::CONSTANT_BUFFER_9:

    return 512 + 4096 * 9;

  case AMDGPUAS::CONSTANT_BUFFER_10:

    return 512 + 4096 * 10;

  case AMDGPUAS::CONSTANT_BUFFER_11:

    return 512 + 4096 * 11;

  case AMDGPUAS::CONSTANT_BUFFER_12:

    return 512 + 4096 * 12;

  case AMDGPUAS::CONSTANT_BUFFER_13:

    return 512 + 4096 * 13;

  case AMDGPUAS::CONSTANT_BUFFER_14:

    return 512 + 4096 * 14;

  case AMDGPUAS::CONSTANT_BUFFER_15:

    return 512 + 4096 * 15;

  default:

    return -1;

  }

}


SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,

                                                SelectionDAG &DAG) const {

  SDLoc DL(Op);

  LoadSDNode *Load = cast<LoadSDNode>(Op);

  ISD::LoadExtType ExtType = Load->getExtensionType();

  EVT MemVT = Load->getMemoryVT();

  assert(Load->getAlign() >= MemVT.getStoreSize());


  SDValue BasePtr = Load->getBasePtr();

  SDValue Chain = Load->getChain();

  SDValue Offset = Load->getOffset();


  SDValue LoadPtr = BasePtr;

  if (!Offset.isUndef()) {

    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);

  }


  // Get dword location

  // NOTE: this should be eliminated by the future SHR ptr, 2

  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,

                            DAG.getConstant(0xfffffffc, DL, MVT::i32));


  // Load dword

  // TODO: can we be smarter about machine pointer info?

  MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);

  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);


  // Get offset within the register.

  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,

                                LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));


  // Bit offset of target byte (byteIdx * 8).

  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,

                                 DAG.getConstant(3, DL, MVT::i32));


  // Shift to the right.

  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);


  // Eliminate the upper bits by setting them to ...

  EVT MemEltVT = MemVT.getScalarType();


  if (ExtType == ISD::SEXTLOAD) { // ... ones.

    SDValue MemEltVTNode = DAG.getValueType(MemEltVT);

    Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);

  } else { // ... or zeros.

    Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);

  }


  SDValue Ops[] = {

    Ret,

    Read.getValue(1) // This should be our output chain

  };


  return DAG.getMergeValues(Ops, DL);

}


SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {

  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);

  unsigned AS = LoadNode->getAddressSpace();

  EVT MemVT = LoadNode->getMemoryVT();

  ISD::LoadExtType ExtType = LoadNode->getExtensionType();


  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&

      ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {

    return lowerPrivateExtLoad(Op, DAG);

  }


  SDLoc DL(Op);

  EVT VT = Op.getValueType();

  SDValue Chain = LoadNode->getChain();

  SDValue Ptr = LoadNode->getBasePtr();


  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||

      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&

      VT.isVector()) {

    SDValue Ops[2];

    std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);

    return DAG.getMergeValues(Ops, DL);

  }


  // This is still used for explicit load from addrspace(8)

  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());

  if (ConstantBlock > -1 &&

      ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||

       (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {

    SDValue Result;

    if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||

        isa<ConstantSDNode>(Ptr)) {

      return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);

    }

    // TODO: Does this even work?

    //  non-constant ptr can't be folded, keeps it as a v4f32 load

    Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,

                         DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,

                                     DAG.getConstant(4, DL, MVT::i32)),

                         DAG.getConstant(LoadNode->getAddressSpace() -

                                             AMDGPUAS::CONSTANT_BUFFER_0,

                                         DL, MVT::i32));


    if (!VT.isVector()) {

      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,

                           DAG.getConstant(0, DL, MVT::i32));

    }


    SDValue MergedValues[2] = {

      Result,

      Chain

    };

    return DAG.getMergeValues(MergedValues, DL);

  }


  // For most operations returning SDValue() will result in the node being

  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we

  // need to manually expand loads that may be legal in some address spaces and

  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for

  // compute shaders, since the data is sign extended when it is uploaded to the

  // buffer. However SEXT loads from other address spaces are not supported, so

  // we need to expand them here.

  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {

    assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));

    SDValue NewLoad = DAG.getExtLoad(

        ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,

        LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());

    SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,

                              DAG.getValueType(MemVT));


    SDValue MergedValues[2] = { Res, Chain };

    return DAG.getMergeValues(MergedValues, DL);

  }


  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {

    return SDValue();

  }


  // DWORDADDR ISD marks already shifted address

  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {

    assert(VT == MVT::i32);

    Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));

    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);

    return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());

  }

  return SDValue();

}


SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  SDValue Cond  = Op.getOperand(1);

  SDValue Jump  = Op.getOperand(2);


  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),

                     Chain, Jump, Cond);

}


SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,

                                            SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  const R600FrameLowering *TFL = Subtarget->getFrameLowering();


  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);


  unsigned FrameIndex = FIN->getIndex();

  Register IgnoredFrameReg;

  StackOffset Offset =

      TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);

  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),

                         SDLoc(Op), Op.getValueType());

}


CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,

                                                  bool IsVarArg) const {

  switch (CC) {

  case CallingConv::AMDGPU_KERNEL:

  case CallingConv::SPIR_KERNEL:

  case CallingConv::C:

  case CallingConv::Fast:

  case CallingConv::Cold:

    llvm_unreachable("kernels should not be handled here");

  case CallingConv::AMDGPU_VS:

  case CallingConv::AMDGPU_GS:

  case CallingConv::AMDGPU_PS:

  case CallingConv::AMDGPU_CS:

  case CallingConv::AMDGPU_HS:

  case CallingConv::AMDGPU_ES:

  case CallingConv::AMDGPU_LS:

    return CC_R600;

  default:

    reportFatalUsageError("unsupported calling convention");

  }

}


/// XXX Only kernel functions are supported, so we can assume for now that

/// every function is a kernel function, but in the future we should use

/// separate calling conventions for kernel and non-kernel functions.


SDValue R600TargetLowering::LowerFormalArguments(

    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,

    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

                 *DAG.getContext());

  MachineFunction &MF = DAG.getMachineFunction();


  if (AMDGPU::isShader(CallConv)) {

    CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));

  } else {

    analyzeFormalArgumentsCompute(CCInfo, Ins);

  }


  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {

    CCValAssign &VA = ArgLocs[i];

    const ISD::InputArg &In = Ins[i];

    EVT VT = In.VT;

    EVT MemVT = VA.getLocVT();

    if (!VT.isVector() && MemVT.isVector()) {

      // Get load source type if scalarized.

      MemVT = MemVT.getVectorElementType();

    }


    if (VT.isInteger() && !MemVT.isInteger())

      MemVT = MemVT.changeTypeToInteger();


    if (AMDGPU::isShader(CallConv)) {

      Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);

      SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);

      InVals.push_back(Register);

      continue;

    }


    // i64 isn't a legal type, so the register type used ends up as i32, which

    // isn't expected here. It attempts to create this sextload, but it ends up

    // being invalid. Somehow this seems to work with i64 arguments, but breaks

    // for <1 x i64>.


    // The first 36 bytes of the input buffer contains information about

    // thread group and global sizes.

    ISD::LoadExtType Ext = ISD::NON_EXTLOAD;

    if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {

      if (VT.isFloatingPoint()) {

        Ext = ISD::EXTLOAD;

      } else {

        // FIXME: This should really check the extload type, but the handling of

        // extload vector parameters seems to be broken.


        // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;

        Ext = ISD::SEXTLOAD;

      }

    }


    // Compute the offset from the value.

    // XXX - I think PartOffset should give you this, but it seems to give the

    // size of the register which isn't useful.


    unsigned PartOffset = VA.getLocMemOffset();

    Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);


    MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS);

    SDValue Arg = DAG.getLoad(

        ISD::UNINDEXED, Ext, VT, DL, Chain,

        DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),

        PtrInfo,

        MemVT, Alignment, MachineMemOperand::MONonTemporal |

                                        MachineMemOperand::MODereferenceable |

                                        MachineMemOperand::MOInvariant);


    InVals.push_back(Arg);

  }

  return Chain;

}


EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,

                                           EVT VT) const {

   if (!VT.isVector())

     return MVT::i32;

   return VT.changeVectorElementTypeToInteger();

}


bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,

                                          const MachineFunction &MF) const {

  // Local and Private addresses do not handle vectors. Limit to i32

  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {

    return (MemVT.getSizeInBits() <= 32);

  }

  return true;

}


bool R600TargetLowering::allowsMisalignedMemoryAccesses(

    EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,

    unsigned *IsFast) const {

  if (IsFast)

    *IsFast = 0;


  if (!VT.isSimple() || VT == MVT::Other)

    return false;


  if (VT.bitsLT(MVT::i32))

    return false;


  // TODO: This is a rough estimate.

  if (IsFast)

    *IsFast = 1;


  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);

}


static SDValue CompactSwizzlableVector(

  SelectionDAG &DAG, SDValue VectorEntry,

  DenseMap<unsigned, unsigned> &RemapSwizzle) {

  assert(RemapSwizzle.empty());


  SDLoc DL(VectorEntry);

  EVT EltTy = VectorEntry.getValueType().getVectorElementType();


  SDValue NewBldVec[4];

  for (unsigned i = 0; i < 4; i++)

    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,

                               DAG.getIntPtrConstant(i, DL));


  for (unsigned i = 0; i < 4; i++) {

    if (NewBldVec[i].isUndef())

      // We mask write here to teach later passes that the ith element of this

      // vector is undef. Thus we can use it to reduce 128 bits reg usage,

      // break false dependencies and additionally make assembly easier to read.

      RemapSwizzle[i] = 7; // SEL_MASK_WRITE

    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {

      if (C->isZero()) {

        RemapSwizzle[i] = 4; // SEL_0

        NewBldVec[i] = DAG.getUNDEF(MVT::f32);

      } else if (C->isExactlyValue(1.0)) {

        RemapSwizzle[i] = 5; // SEL_1

        NewBldVec[i] = DAG.getUNDEF(MVT::f32);

      }

    }


    if (NewBldVec[i].isUndef())

      continue;


    for (unsigned j = 0; j < i; j++) {

      if (NewBldVec[i] == NewBldVec[j]) {

        NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());

        RemapSwizzle[i] = j;

        break;

      }

    }

  }


  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),

                            NewBldVec);

}


static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,

                                DenseMap<unsigned, unsigned> &RemapSwizzle) {

  assert(RemapSwizzle.empty());


  SDLoc DL(VectorEntry);

  EVT EltTy = VectorEntry.getValueType().getVectorElementType();


  SDValue NewBldVec[4];

  bool isUnmovable[4] = {false, false, false, false};

  for (unsigned i = 0; i < 4; i++)

    NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,

                               DAG.getIntPtrConstant(i, DL));


  for (unsigned i = 0; i < 4; i++) {

    RemapSwizzle[i] = i;

    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

      unsigned Idx = NewBldVec[i].getConstantOperandVal(1);

      if (i == Idx)

        isUnmovable[Idx] = true;

    }

  }


  for (unsigned i = 0; i < 4; i++) {

    if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

      unsigned Idx = NewBldVec[i].getConstantOperandVal(1);

      if (isUnmovable[Idx])

        continue;

      // Swap i and Idx

      std::swap(NewBldVec[Idx], NewBldVec[i]);

      std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);

      break;

    }

  }


  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),

                            NewBldVec);

}


SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],

                                            SelectionDAG &DAG,

                                            const SDLoc &DL) const {

  // Old -> New swizzle values

  DenseMap<unsigned, unsigned> SwizzleRemap;


  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);

  for (unsigned i = 0; i < 4; i++) {

    unsigned Idx = Swz[i]->getAsZExtVal();

    auto It = SwizzleRemap.find(Idx);

    if (It != SwizzleRemap.end())

      Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);

  }


  SwizzleRemap.clear();

  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);

  for (unsigned i = 0; i < 4; i++) {

    unsigned Idx = Swz[i]->getAsZExtVal();

    auto It = SwizzleRemap.find(Idx);

    if (It != SwizzleRemap.end())

      Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);

  }


  return BuildVector;

}


SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,

                                            SelectionDAG &DAG) const {

  SDLoc DL(LoadNode);

  EVT VT = LoadNode->getValueType(0);

  SDValue Chain = LoadNode->getChain();

  SDValue Ptr = LoadNode->getBasePtr();

  assert (isa<ConstantSDNode>(Ptr));


  //TODO: Support smaller loads

  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))

    return SDValue();


  if (LoadNode->getAlign() < Align(4))

    return SDValue();


  int ConstantBlock = ConstantAddressBlock(Block);


  SDValue Slots[4];

  for (unsigned i = 0; i < 4; i++) {

    // We want Const position encoded with the following formula :

    // (((512 + (kc_bank << 12) + const_index) << 2) + chan)

    // const_index is Ptr computed by llvm using an alignment of 16.

    // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and

    // then div by 4 at the ISel step

    SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,

        DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));

    Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);

  }

  EVT NewVT = MVT::v4i32;

  unsigned NumElements = 4;

  if (VT.isVector()) {

    NewVT = VT;

    NumElements = VT.getVectorNumElements();

  }

  SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));

  if (!VT.isVector()) {

    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,

                         DAG.getConstant(0, DL, MVT::i32));

  }

  SDValue MergedValues[2] = {

    Result,

    Chain

  };

  return DAG.getMergeValues(MergedValues, DL);

}


//===----------------------------------------------------------------------===//

// Custom DAG Optimizations

//===----------------------------------------------------------------------===//


SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,

                                              DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  SDLoc DL(N);


  switch (N->getOpcode()) {

  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)

  case ISD::FP_ROUND: {

      SDValue Arg = N->getOperand(0);

      if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {

        return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),

                           Arg.getOperand(0));

      }

      break;

    }


  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->

  // (i32 select_cc f32, f32, -1, 0 cc)

  //

  // Mesa's GLSL frontend generates the above pattern a lot and we can lower

  // this to one of the SET*_DX10 instructions.

  case ISD::FP_TO_SINT: {

    SDValue FNeg = N->getOperand(0);

    if (FNeg.getOpcode() != ISD::FNEG) {

      return SDValue();

    }

    SDValue SelectCC = FNeg.getOperand(0);

    if (SelectCC.getOpcode() != ISD::SELECT_CC ||

        SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS

        SelectCC.getOperand(2).getValueType() != MVT::f32 || // True

        !isHWTrueValue(SelectCC.getOperand(2)) ||

        !isHWFalseValue(SelectCC.getOperand(3))) {

      return SDValue();

    }


    return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),

                       SelectCC.getOperand(0),               // LHS

                       SelectCC.getOperand(1),               // RHS

                       DAG.getAllOnesConstant(DL, MVT::i32), // True

                       DAG.getConstant(0, DL, MVT::i32),     // False

                       SelectCC.getOperand(4));              // CC

  }


  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx

  // => build_vector elt0, ... , NewEltIdx, ... , eltN

  case ISD::INSERT_VECTOR_ELT: {

    SDValue InVec = N->getOperand(0);

    SDValue InVal = N->getOperand(1);

    SDValue EltNo = N->getOperand(2);


    // If the inserted element is an UNDEF, just use the input vector.

    if (InVal.isUndef())

      return InVec;


    EVT VT = InVec.getValueType();


    // If we can't generate a legal BUILD_VECTOR, exit

    if (!isOperationLegal(ISD::BUILD_VECTOR, VT))

      return SDValue();


    // Check that we know which element is being inserted

    if (!isa<ConstantSDNode>(EltNo))

      return SDValue();

    unsigned Elt = EltNo->getAsZExtVal();


    // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially

    // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the

    // vector elements.

    SmallVector<SDValue, 8> Ops;

    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {

      Ops.append(InVec.getNode()->op_begin(),

                 InVec.getNode()->op_end());

    } else if (InVec.isUndef()) {

      unsigned NElts = VT.getVectorNumElements();

      Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));

    } else {

      return SDValue();

    }


    // Insert the element

    if (Elt < Ops.size()) {

      // All the operands of BUILD_VECTOR must have the same type;

      // we enforce that here.

      EVT OpVT = Ops[0].getValueType();

      if (InVal.getValueType() != OpVT)

        InVal = OpVT.bitsGT(InVal.getValueType()) ?

          DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :

          DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);

      Ops[Elt] = InVal;

    }


    // Return the new vector

    return DAG.getBuildVector(VT, DL, Ops);

  }


  // Extract_vec (Build_vector) generated by custom lowering

  // also needs to be customly combined

  case ISD::EXTRACT_VECTOR_ELT: {

    SDValue Arg = N->getOperand(0);

    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {

      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {

        unsigned Element = Const->getZExtValue();

        return Arg->getOperand(Element);

      }

    }

    if (Arg.getOpcode() == ISD::BITCAST &&

        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&

        (Arg.getOperand(0).getValueType().getVectorNumElements() ==

         Arg.getValueType().getVectorNumElements())) {

      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {

        unsigned Element = Const->getZExtValue();

        return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),

                           Arg->getOperand(0).getOperand(Element));

      }

    }

    break;

  }


  case ISD::SELECT_CC: {

    // Try common optimizations

    if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))

      return Ret;


    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->

    //      selectcc x, y, a, b, inv(cc)

    //

    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->

    //      selectcc x, y, a, b, cc

    SDValue LHS = N->getOperand(0);

    if (LHS.getOpcode() != ISD::SELECT_CC) {

      return SDValue();

    }


    SDValue RHS = N->getOperand(1);

    SDValue True = N->getOperand(2);

    SDValue False = N->getOperand(3);

    ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();


    if (LHS.getOperand(2).getNode() != True.getNode() ||

        LHS.getOperand(3).getNode() != False.getNode() ||

        RHS.getNode() != False.getNode()) {

      return SDValue();

    }


    switch (NCC) {

    default: return SDValue();

    case ISD::SETNE: return LHS;

    case ISD::SETEQ: {

      ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();

      LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());

      if (DCI.isBeforeLegalizeOps() ||

          isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))

        return DAG.getSelectCC(DL,

                               LHS.getOperand(0),

                               LHS.getOperand(1),

                               LHS.getOperand(2),

                               LHS.getOperand(3),

                               LHSCC);

      break;

    }

    }

    return SDValue();

  }


  case AMDGPUISD::R600_EXPORT: {

    SDValue Arg = N->getOperand(1);

    if (Arg.getOpcode() != ISD::BUILD_VECTOR)

      break;


    SDValue NewArgs[8] = {

      N->getOperand(0), // Chain

      SDValue(),

      N->getOperand(2), // ArrayBase

      N->getOperand(3), // Type

      N->getOperand(4), // SWZ_X

      N->getOperand(5), // SWZ_Y

      N->getOperand(6), // SWZ_Z

      N->getOperand(7) // SWZ_W

    };

    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);

    return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);

  }

  case AMDGPUISD::TEXTURE_FETCH: {

    SDValue Arg = N->getOperand(1);

    if (Arg.getOpcode() != ISD::BUILD_VECTOR)

      break;


    SDValue NewArgs[19] = {

      N->getOperand(0),

      N->getOperand(1),

      N->getOperand(2),

      N->getOperand(3),

      N->getOperand(4),

      N->getOperand(5),

      N->getOperand(6),

      N->getOperand(7),

      N->getOperand(8),

      N->getOperand(9),

      N->getOperand(10),

      N->getOperand(11),

      N->getOperand(12),

      N->getOperand(13),

      N->getOperand(14),

      N->getOperand(15),

      N->getOperand(16),

      N->getOperand(17),

      N->getOperand(18),

    };

    NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);

    return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);

  }


  case ISD::LOAD: {

    LoadSDNode *LoadNode = cast<LoadSDNode>(N);

    SDValue Ptr = LoadNode->getBasePtr();

    if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&

         isa<ConstantSDNode>(Ptr))

      return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);

    break;

  }


  default: break;

  }


  return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);

}


bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,

                                     SDValue &Src, SDValue &Neg, SDValue &Abs,

                                     SDValue &Sel, SDValue &Imm,

                                     SelectionDAG &DAG) const {

  const R600InstrInfo *TII = Subtarget->getInstrInfo();

  if (!Src.isMachineOpcode())

    return false;


  switch (Src.getMachineOpcode()) {

  case R600::FNEG_R600:

    if (!Neg.getNode())

      return false;

    Src = Src.getOperand(0);

    Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);

    return true;

  case R600::FABS_R600:

    if (!Abs.getNode())

      return false;

    Src = Src.getOperand(0);

    Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);

    return true;

  case R600::CONST_COPY: {

    unsigned Opcode = ParentNode->getMachineOpcode();

    bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;


    if (!Sel.getNode())

      return false;


    SDValue CstOffset = Src.getOperand(0);

    if (ParentNode->getValueType(0).isVector())

      return false;


    // Gather constants values

    int SrcIndices[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0),

      TII->getOperandIdx(Opcode, R600::OpName::src1),

      TII->getOperandIdx(Opcode, R600::OpName::src2),

      TII->getOperandIdx(Opcode, R600::OpName::src0_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_W)

    };

    std::vector<unsigned> Consts;

    for (int OtherSrcIdx : SrcIndices) {

      int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);

      if (OtherSrcIdx < 0 || OtherSelIdx < 0)

        continue;

      if (HasDst) {

        OtherSrcIdx--;

        OtherSelIdx--;

      }

      if (RegisterSDNode *Reg =

          dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {

        if (Reg->getReg() == R600::ALU_CONST) {

          Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));

        }

      }

    }


    ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);

    Consts.push_back(Cst->getZExtValue());

    if (!TII->fitsConstReadLimitations(Consts)) {

      return false;

    }


    Sel = CstOffset;

    Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);

    return true;

  }

  case R600::MOV_IMM_GLOBAL_ADDR:

    // Check if the Imm slot is used. Taken from below.

    if (Imm->getAsZExtVal())

      return false;

    Imm = Src.getOperand(0);

    Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);

    return true;

  case R600::MOV_IMM_I32:

  case R600::MOV_IMM_F32: {

    unsigned ImmReg = R600::ALU_LITERAL_X;

    uint64_t ImmValue = 0;


    if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {

      ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));

      float FloatValue = FPC->getValueAPF().convertToFloat();

      if (FloatValue == 0.0) {

        ImmReg = R600::ZERO;

      } else if (FloatValue == 0.5) {

        ImmReg = R600::HALF;

      } else if (FloatValue == 1.0) {

        ImmReg = R600::ONE;

      } else {

        ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();

      }

    } else {

      uint64_t Value = Src.getConstantOperandVal(0);

      if (Value == 0) {

        ImmReg = R600::ZERO;

      } else if (Value == 1) {

        ImmReg = R600::ONE_INT;

      } else {

        ImmValue = Value;

      }

    }


    // Check that we aren't already using an immediate.

    // XXX: It's possible for an instruction to have more than one

    // immediate operand, but this is not supported yet.

    if (ImmReg == R600::ALU_LITERAL_X) {

      if (!Imm.getNode())

        return false;

      ConstantSDNode *C = cast<ConstantSDNode>(Imm);

      if (C->getZExtValue())

        return false;

      Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);

    }

    Src = DAG.getRegister(ImmReg, MVT::i32);

    return true;

  }

  default:

    return false;

  }

}


/// Fold the instructions after selecting them

SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,

                                            SelectionDAG &DAG) const {

  const R600InstrInfo *TII = Subtarget->getInstrInfo();

  if (!Node->isMachineOpcode())

    return Node;


  unsigned Opcode = Node->getMachineOpcode();

  SDValue FakeOp;


  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());


  if (Opcode == R600::DOT_4) {

    int OperandIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_W)

        };

    int NegIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)

    };

    int AbsIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)

    };

    for (unsigned i = 0; i < 8; i++) {

      if (OperandIdx[i] < 0)

        return Node;

      SDValue &Src = Ops[OperandIdx[i] - 1];

      SDValue &Neg = Ops[NegIdx[i] - 1];

      SDValue &Abs = Ops[AbsIdx[i] - 1];

      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;

      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);

      if (HasDst)

        SelIdx--;

      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;

      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  } else if (Opcode == R600::REG_SEQUENCE) {

    for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {

      SDValue &Src = Ops[i];

      if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  } else {

    if (!TII->hasInstrModifiers(Opcode))

      return Node;

    int OperandIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0),

      TII->getOperandIdx(Opcode, R600::OpName::src1),

      TII->getOperandIdx(Opcode, R600::OpName::src2)

    };

    int NegIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_neg),

      TII->getOperandIdx(Opcode, R600::OpName::src1_neg),

      TII->getOperandIdx(Opcode, R600::OpName::src2_neg)

    };

    int AbsIdx[] = {

      TII->getOperandIdx(Opcode, R600::OpName::src0_abs),

      TII->getOperandIdx(Opcode, R600::OpName::src1_abs),

      -1

    };

    for (unsigned i = 0; i < 3; i++) {

      if (OperandIdx[i] < 0)

        return Node;

      SDValue &Src = Ops[OperandIdx[i] - 1];

      SDValue &Neg = Ops[NegIdx[i] - 1];

      SDValue FakeAbs;

      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;

      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;

      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);

      int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);

      if (HasDst) {

        SelIdx--;

        ImmIdx--;

      }

      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;

      SDValue &Imm = Ops[ImmIdx];

      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))

        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);

    }

  }


  return Node;

}


TargetLowering::AtomicExpansionKind

R600TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const {

  switch (RMW->getOperation()) {

  case AtomicRMWInst::Nand:

  case AtomicRMWInst::FAdd:

  case AtomicRMWInst::FSub:

  case AtomicRMWInst::FMax:

  case AtomicRMWInst::FMin:

  case AtomicRMWInst::USubCond:

  case AtomicRMWInst::USubSat:

    return AtomicExpansionKind::CmpXChg;

  case AtomicRMWInst::UIncWrap:

  case AtomicRMWInst::UDecWrap:

    // FIXME: Cayman at least appears to have instructions for this, but the

    // instruction definitions appear to be missing.

    return AtomicExpansionKind::CmpXChg;

  case AtomicRMWInst::Xchg: {

    const DataLayout &DL = RMW->getFunction()->getDataLayout();

    unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());

    if (ValSize == 32 || ValSize == 64)

      return AtomicExpansionKind::None;

    return AtomicExpansionKind::CmpXChg;

  }

  default:

    if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {

      unsigned Size = IntTy->getBitWidth();

      if (Size == 32 || Size == 64)

        return AtomicExpansionKind::None;

    }


    return AtomicExpansionKind::CmpXChg;

  }


  llvm_unreachable("covered atomicrmw op switch");

}

SDValue
return SDValue()

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUSelectionDAGInfo.h

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

Results
Function Alias Analysis Results
Definition AliasAnalysis.cpp:808

CodeGenPassBuilder.h
Interfaces for producing common pass manager configurations.

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineFunction.h

isUndef
static bool isUndef(const MachineInstr &MI)
Definition MachineSSAContext.cpp:57

Reg
Register Reg
Definition MachineSink.cpp:2126

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

R600Defines.h

MO_FLAG_NEG
#define MO_FLAG_NEG
Definition R600Defines.h:15

MO_FLAG_ABS
#define MO_FLAG_ABS
Definition R600Defines.h:16

MO_FLAG_MASK
#define MO_FLAG_MASK
Definition R600Defines.h:17

MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition R600Defines.h:18

isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition R600ISelLowering.cpp:212

ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition R600ISelLowering.cpp:1611

ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition R600ISelLowering.cpp:1223

CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition R600ISelLowering.cpp:1566

R600ISelLowering.h
R600 DAG Lowering interface definition.

R600MCTargetDesc.h
Provides R600 specific target descriptions.

R600MachineFunctionInfo.h

R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.

R600TargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

getValueType
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
Definition SLPVectorizer.cpp:317

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition AMDGPUFrameLowering.cpp:22

llvm::AMDGPUMachineFunctionInfo
Definition AMDGPUMachineFunctionInfo.h:24

llvm::AMDGPUSubtarget::R700
@ R700
Definition AMDGPUSubtarget.h:35

llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition AMDGPUISelLowering.cpp:1744

llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition AMDGPUISelLowering.cpp:1215

llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI, SDValue Op, SelectionDAG &DAG) const
Definition AMDGPUISelLowering.cpp:1529

llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition AMDGPUISelLowering.cpp:2389

llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition AMDGPUISelLowering.cpp:1436

llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition AMDGPUISelLowering.cpp:5964

llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition AMDGPUISelLowering.h:373

llvm::AMDGPUTargetLowering::AMDGPUTargetLowering
AMDGPUTargetLowering(const TargetMachine &TM, const TargetSubtargetInfo &STI, const AMDGPUSubtarget &AMDGPUSTI)
Definition AMDGPUISelLowering.cpp:62

llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition AMDGPUISelLowering.h:398

llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition AMDGPUISelLowering.cpp:1489

llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition AMDGPUISelLowering.cpp:5575

llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition AMDGPUISelLowering.cpp:2120

llvm::APFloat::convertToFloat
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5986

llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition APFloat.h:1430

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563

llvm::AddrSpaceCastSDNode::getSrcAddressSpace
unsigned getSrcAddressSpace() const
Definition SelectionDAGNodes.h:1416

llvm::AddrSpaceCastSDNode::getDestAddressSpace
unsigned getDestAddressSpace() const
Definition SelectionDAGNodes.h:1417

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition Instructions.h:710

llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition Instructions.h:747

llvm::AtomicRMWInst::USubCond
@ USubCond
Subtract only if no unsigned overflow.
Definition Instructions.h:786

llvm::AtomicRMWInst::USubSat
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition Instructions.h:790

llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition Instructions.h:750

llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition Instructions.h:778

llvm::AtomicRMWInst::FMin
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition Instructions.h:758

llvm::AtomicRMWInst::FMax
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition Instructions.h:754

llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition Instructions.h:782

llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition Instructions.h:724

llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition Instructions.h:732

llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition Instructions.h:830

llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition CallingConvLower.h:171

llvm::CCState::AnalyzeFormalArguments
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition CallingConvLower.cpp:85

llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition CallingConvLower.h:34

llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition CallingConvLower.h:129

llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition CallingConvLower.h:130

llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition CallingConvLower.h:133

llvm::ConstantFPSDNode
Definition SelectionDAGNodes.h:1877

llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition SelectionDAGNodes.h:1890

llvm::ConstantPointerNull::get
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition Constants.cpp:2030

llvm::ConstantSDNode
Definition SelectionDAGNodes.h:1822

llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition SelectionDAGNodes.h:1839

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178

llvm::DenseMapBase::empty
bool empty() const
Definition DenseMap.h:109

llvm::DenseMapBase::end
iterator end()
Definition DenseMap.h:81

llvm::DenseMapBase::clear
void clear()
Definition DenseMap.h:121

llvm::DenseMap
Definition DenseMap.h:743

llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition SelectionDAGNodes.h:2116

llvm::Function::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362

llvm::GlobalAddressSDNode::getAddressSpace
LLVM_ABI unsigned getAddressSpace() const
Definition SelectionDAG.cpp:14446

llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition SelectionDAGNodes.h:2075

llvm::Instruction::getFunction
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition Instruction.cpp:90

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition SelectionDAGNodes.h:2645

llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition SelectionDAGNodes.h:2657

llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:2676

llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition SelectionDAGNodes.h:2672

llvm::MVT
Machine Value Type.
Definition MachineValueType.h:36

llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition MachineValueType.h:537

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::findDebugLoc
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Definition MachineBasicBlock.cpp:1561

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:386

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:330

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:348

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:798

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:884

llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition MachineFunction.cpp:802

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:171

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:199

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:233

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:326

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition MachineMemOperand.h:133

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition MachineMemOperand.h:143

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition MachineMemOperand.h:147

llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition MachineMemOperand.h:227

llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition MachineMemOperand.h:216

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition MachineOperand.h:586

llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition MachineOperand.h:229

llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition MachineOperand.h:638

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::getVRegDef
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition MachineRegisterInfo.cpp:404

llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition MachineRegisterInfo.h:509

llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition SelectionDAGNodes.h:3267

llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition SelectionDAGNodes.h:1552

llvm::MemSDNode::getAlign
Align getAlign() const
Definition SelectionDAGNodes.h:1450

llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
Definition SelectionDAGNodes.h:1521

llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition SelectionDAGNodes.h:1547

llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition SelectionDAGNodes.h:1588

llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition SelectionDAGNodes.h:1516

llvm::PointerType::get
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.

llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition R600FrameLowering.cpp:19

llvm::R600InstrInfo
Definition R600InstrInfo.h:39

llvm::R600MachineFunctionInfo
Definition R600MachineFunctionInfo.h:21

llvm::R600Subtarget
Definition R600Subtarget.h:29

llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition R600Subtarget.h:50

llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition R600ISelLowering.cpp:1725

llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition R600ISelLowering.cpp:219

llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition R600ISelLowering.cpp:1538

llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition R600ISelLowering.cpp:31

llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition R600ISelLowering.cpp:1531

llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition R600ISelLowering.cpp:586

llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition R600ISelLowering.cpp:1547

llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition R600ISelLowering.cpp:1430

llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition R600ISelLowering.cpp:1455

llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition R600ISelLowering.cpp:403

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79

llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition SelectionDAGNodes.h:1253

llvm::SDNode
Represents one node in the SelectionDAG.
Definition SelectionDAGNodes.h:517

llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition SelectionDAGNodes.h:1071

llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition SelectionDAGNodes.h:712

llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition SelectionDAGNodes.h:1865

llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition SelectionDAGNodes.h:770

llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition SelectionDAGNodes.h:1062

llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition SelectionDAGNodes.h:1861

llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition SelectionDAGNodes.h:1132

llvm::SDNode::op_end
op_iterator op_end() const
Definition SelectionDAGNodes.h:1070

llvm::SDNode::op_begin
op_iterator op_begin() const
Definition SelectionDAGNodes.h:1069

llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition SelectionDAGNodes.h:147

llvm::SDValue::isUndef
bool isUndef() const
Definition SelectionDAGNodes.h:1320

llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition SelectionDAGNodes.h:161

llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition SelectionDAGNodes.h:181

llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition SelectionDAGNodes.h:1288

llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition SelectionDAGNodes.h:1296

llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition SelectionDAGNodes.h:1300

llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition SelectionDAGNodes.h:1284

llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition SelectionDAG.h:231

llvm::SelectionDAG::getExtLoad
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition SelectionDAG.cpp:10594

llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:781

llvm::SelectionDAG::getMergeValues
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition SelectionDAG.cpp:10312

llvm::SelectionDAG::getVTList
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition SelectionDAG.cpp:12004

llvm::SelectionDAG::getAllOnesConstant
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition SelectionDAG.cpp:1861

llvm::SelectionDAG::getMachineNode
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition SelectionDAG.cpp:12445

llvm::SelectionDAG::getConstantFP
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition SelectionDAG.cpp:1934

llvm::SelectionDAG::getRegister
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
Definition SelectionDAG.cpp:2434

llvm::SelectionDAG::getLoad
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition SelectionDAG.cpp:10577

llvm::SelectionDAG::getMemIntrinsicNode
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition SelectionDAG.cpp:10323

llvm::SelectionDAG::getNOT
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition SelectionDAG.cpp:1681

llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition SelectionDAG.h:1207

llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition SelectionDAG.h:896

llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition SelectionDAG.h:867

llvm::SelectionDAG::getZeroExtendInReg
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition SelectionDAG.cpp:1621

llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition SelectionDAG.h:514

llvm::SelectionDAG::getConstant
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition SelectionDAG.cpp:1725

llvm::SelectionDAG::getStore
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition SelectionDAG.cpp:10627

llvm::SelectionDAG::getSignedConstant
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition SelectionDAG.cpp:1855

llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition SelectionDAG.h:1422

llvm::SelectionDAG::getIntPtrConstant
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1867

llvm::SelectionDAG::getValueType
LLVM_ABI SDValue getValueType(EVT)
Definition SelectionDAG.cpp:2114

llvm::SelectionDAG::getNode
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition SelectionDAG.cpp:11635

llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition SelectionDAG.h:730

llvm::SelectionDAG::getVectorIdxConstant
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1885

llvm::SelectionDAG::ReplaceAllUsesOfValueWith
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition SelectionDAG.cpp:13124

llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition SelectionDAG.h:509

llvm::SelectionDAG::getCondCode
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
Definition SelectionDAG.cpp:2166

llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition SelectionDAG.h:534

llvm::SelectionDAG::UpdateNodeOperands
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
Definition SelectionDAG.cpp:12097

llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition SelectionDAG.h:604

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition SelectionDAGNodes.h:2685

llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:2704

llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition SelectionDAGNodes.h:2703

llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition SelectionDAGNodes.h:2701

llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition TargetLowering.h:2642

llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition TargetLowering.h:2705

llvm::TargetLoweringBase::Expand
@ Expand
Definition TargetLowering.h:206

llvm::TargetLoweringBase::Promote
@ Promote
Definition TargetLowering.h:205

llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition TargetLowering.h:2671

llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition TargetLowering.h:2628

llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition TargetLoweringBase.cpp:1717

llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition TargetLowering.h:2688

llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
Definition TargetLowering.h:1708

llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition TargetLowering.h:381

llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition TargetLowering.h:1483

llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition TargetLowering.h:2768

llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition TargetLowering.h:240

llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition TargetLowering.h:256

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Definition TargetLowering.h:264

llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
Definition TargetLowering.h:257

llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition TargetLowering.h:2829

llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition TargetLowering.h:2893

llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition TargetLowering.h:2722

llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition TargetLowering.h:2647

llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition TargetLowering.cpp:11217

llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition TargetLowering.cpp:11440

llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition TargetLowering.cpp:9220

llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition TargetLowering.cpp:11129

llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition SelectionDAGISel.cpp:333

llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition TargetLowering.cpp:8958

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

uint32_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition AMDGPUAddrSpace.h:72

llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition AMDGPUAddrSpace.h:82

llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition AMDGPUAddrSpace.h:78

llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition AMDGPUAddrSpace.h:73

llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition AMDGPUAddrSpace.h:79

llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition AMDGPUAddrSpace.h:69

llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition AMDGPUAddrSpace.h:68

llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition AMDGPUAddrSpace.h:67

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition AMDGPUAddrSpace.h:75

llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition AMDGPUAddrSpace.h:71

llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition AMDGPUAddrSpace.h:70

llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition AMDGPUAddrSpace.h:77

llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition AMDGPUAddrSpace.h:59

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition AMDGPUAddrSpace.h:37

llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition AMDGPUAddrSpace.h:33

llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition AMDGPUAddrSpace.h:81

llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition AMDGPUAddrSpace.h:76

llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition AMDGPUAddrSpace.h:74

llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition AMDGPUAddrSpace.h:80

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:38

llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition AMDGPUSelectionDAGInfo.h:28

llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition AMDGPUSelectionDAGInfo.h:40

llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition AMDGPUSelectionDAGInfo.h:30

llvm::AMDGPUISD::DOT4
@ DOT4
Definition AMDGPUSelectionDAGInfo.h:25

llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition AMDGPUSelectionDAGInfo.h:42

llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition AMDGPUSelectionDAGInfo.h:29

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::AMDGPU::isShader
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1450

llvm::AMDGPU::getNullPointerValue
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
Definition AMDGPUAddrSpace.h:174

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:152

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition CallingConv.h:197

llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition CallingConv.h:200

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition CallingConv.h:191

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition CallingConv.h:194

llvm::CallingConv::Cold
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition CallingConv.h:144

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition CallingConv.h:218

llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition CallingConv.h:213

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::IRSimilarity::Legal
@ Legal
Definition IRSimilarityIdentifier.h:77

llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition SelectionDAGNodes.h:3422

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823

llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261

llvm::ISD::STORE
@ STORE
Definition ISDOpcodes.h:1170

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition ISDOpcodes.h:885

llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition ISDOpcodes.h:1379

llvm::ISD::UADDO
@ UADDO
Definition ISDOpcodes.h:349

llvm::ISD::FTRUNC
@ FTRUNC
Definition ISDOpcodes.h:1062

llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294

llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264

llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition ISDOpcodes.h:1169

llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857

llvm::ISD::FSUB
@ FSUB
Definition ISDOpcodes.h:418

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518

llvm::ISD::SUBC
@ SUBC
Definition ISDOpcodes.h:295

llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220

llvm::ISD::GlobalAddress
@ GlobalAddress
Definition ISDOpcodes.h:88

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417

llvm::ISD::UDIVREM
@ UDIVREM
Definition ISDOpcodes.h:281

llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280

llvm::ISD::SRL
@ SRL
Definition ISDOpcodes.h:771

llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997

llvm::ISD::FFLOOR
@ FFLOOR
Definition ISDOpcodes.h:1067

llvm::ISD::FrameIndex
@ FrameIndex
Definition ISDOpcodes.h:90

llvm::ISD::CTLZ_ZERO_POISON
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792

llvm::ISD::USUBO
@ USUBO
Definition ISDOpcodes.h:353

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848

llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition ISDOpcodes.h:1030

llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition ISDOpcodes.h:1215

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition ISDOpcodes.h:931

llvm::ISD::OR
@ OR
Definition ISDOpcodes.h:740

llvm::ISD::IS_FPCLASS
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548

llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition ISDOpcodes.h:838

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800

llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition ISDOpcodes.h:1375

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769

llvm::ISD::FCOS
@ FCOS
Definition ISDOpcodes.h:1035

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576

llvm::ISD::CTPOP
@ CTPOP
Definition ISDOpcodes.h:786

llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815

llvm::ISD::FMUL
@ FMUL
Definition ISDOpcodes.h:419

llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition ISDOpcodes.h:839

llvm::ISD::SUB
@ SUB
Definition ISDOpcodes.h:265

llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition ISDOpcodes.h:1066

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739

llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205

llvm::ISD::SUBE
@ SUBE
Definition ISDOpcodes.h:305

llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565

llvm::ISD::FSIN
@ FSIN
Definition ISDOpcodes.h:1034

llvm::ISD::FCEIL
@ FCEIL
Definition ISDOpcodes.h:1061

llvm::ISD::CTTZ_ZERO_POISON
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963

llvm::ISD::ADDRSPACECAST
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:1001

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860

llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition ISDOpcodes.h:1208

llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837

llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534

llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556

llvm::ISD::getSetCCInverse
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition SelectionDAG.cpp:658

llvm::ISD::getSetCCSwappedOperands
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition SelectionDAG.cpp:635

llvm::ISD::UNINDEXED
@ UNINDEXED
Definition ISDOpcodes.h:1725

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition ISDOpcodes.h:1776

llvm::ISD::SETUNE
@ SETUNE
Definition ISDOpcodes.h:1792

llvm::ISD::SETUEQ
@ SETUEQ
Definition ISDOpcodes.h:1787

llvm::ISD::SETOLE
@ SETOLE
Definition ISDOpcodes.h:1783

llvm::ISD::SETOLT
@ SETOLT
Definition ISDOpcodes.h:1782

llvm::ISD::SETNE
@ SETNE
Definition ISDOpcodes.h:1801

llvm::ISD::SETUGT
@ SETUGT
Definition ISDOpcodes.h:1788

llvm::ISD::SETULT
@ SETULT
Definition ISDOpcodes.h:1790

llvm::ISD::SETUO
@ SETUO
Definition ISDOpcodes.h:1786

llvm::ISD::SETONE
@ SETONE
Definition ISDOpcodes.h:1784

llvm::ISD::SETLT
@ SETLT
Definition ISDOpcodes.h:1799

llvm::ISD::SETO
@ SETO
Definition ISDOpcodes.h:1785

llvm::ISD::SETUGE
@ SETUGE
Definition ISDOpcodes.h:1789

llvm::ISD::SETLE
@ SETLE
Definition ISDOpcodes.h:1800

llvm::ISD::SETULE
@ SETULE
Definition ISDOpcodes.h:1791

llvm::ISD::SETEQ
@ SETEQ
Definition ISDOpcodes.h:1796

llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition ISDOpcodes.h:1756

llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition ISDOpcodes.h:1756

llvm::ISD::EXTLOAD
@ EXTLOAD
Definition ISDOpcodes.h:1756

llvm::NVPTXAS::AddressSpace
AddressSpace
Definition NVPTXAddrSpace.h:21

llvm::R600::getLDSNoRetOp
int32_t getLDSNoRetOp(uint32_t Opcode)

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition SparcInstrInfo.h:32

llvm::Sched::Source
@ Source
Definition TargetLowering.h:106

llvm::cfg::UpdateKind::Insert
@ Insert
Definition CFGUpdate.h:26

llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
Definition CodeView.h:527

llvm::dwarf::Index
Index
Definition Dwarf.h:903

llvm::logicalview::LVAttributeKind::Zero
@ Zero
Definition LVOptions.h:130

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::numbers::pif
constexpr float pif
Definition MathExtras.h:53

llvm::pdb::PDB_SymType::PointerType
@ PointerType
Definition PDBTypes.h:258

llvm::rdf::Node
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315

llvm::Offset
@ Offset
Definition DWP.cpp:557

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::PseudoProbeType::Block
@ Block
Definition PseudoProbe.h:30

llvm::Read
@ Read
Definition CodeGenData.h:108

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165

llvm::isNullConstant
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition SelectionDAG.cpp:13556

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:61

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition CallingConvLower.h:157

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::PackElem::Hi
@ Hi
Definition VECustomDAG.h:132

llvm::PackElem::Lo
@ Lo
Definition VECustomDAG.h:131

llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:23

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201

llvm::VFParamKind::Vector
@ Vector
Definition VFABIDemangler.h:27

llvm::ValueUniformity::Custom
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31

llvm::isAllOnesConstant
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition SelectionDAG.cpp:13570

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876

N
#define N

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90

llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145

llvm::EVT::changeTypeToInteger
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129

llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292

llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308

llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381

llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324

llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176

llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331

llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300

llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336

llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344

llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316

llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160

llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition TargetCallingConv.h:204

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::TargetLowering::DAGCombinerInfo
Definition TargetLowering.h:4535

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition TargetLowering.h:4547

llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4541