29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
557 for (
unsigned i = 0; i < NOps; i++) {
565 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
566 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
568 if (NOps != NumVectorElts) {
573 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
576 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
577 RegSeqArgs[1 + (2 * i) + 1] =
584 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
588 EVT VT =
N->getValueType(0);
592 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
606 Mask[0] < 4 && Mask[1] < 4);
608 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
609 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
610 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
611 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
614 Src0SubReg = Src1SubReg;
616 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
621 Src1SubReg = Src0SubReg;
623 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
633 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
634 Src1SubReg == AMDGPU::sub0) {
650 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
651 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
654 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
655 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
665 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
667 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
670 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
671 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
672 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
673 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
677 unsigned int Opc =
N->getOpcode();
678 if (
N->isMachineOpcode()) {
686 N = glueCopyToM0LDSInit(
N);
701 if (
N->getValueType(0) != MVT::i64)
704 SelectADD_SUB_I64(
N);
709 if (
N->getValueType(0) != MVT::i32)
716 SelectUADDO_USUBO(
N);
719 case AMDGPUISD::FMUL_W_CHAIN: {
720 SelectFMUL_W_CHAIN(
N);
723 case AMDGPUISD::FMA_W_CHAIN: {
724 SelectFMA_W_CHAIN(
N);
730 EVT VT =
N->getValueType(0);
747 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
759 if (
N->getValueType(0) == MVT::i128) {
760 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
761 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
762 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
763 }
else if (
N->getValueType(0) == MVT::i64) {
764 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
765 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
766 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
770 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
771 N->getOperand(1), SubReg1 };
773 N->getValueType(0),
Ops));
779 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
780 Subtarget->has64BitLiterals())
785 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
790 Imm =
C->getZExtValue();
799 case AMDGPUISD::BFE_I32:
800 case AMDGPUISD::BFE_U32: {
826 case AMDGPUISD::DIV_SCALE: {
837 return SelectMUL_LOHI(
N);
848 if (
N->getValueType(0) != MVT::i32)
859 case AMDGPUISD::CVT_PKRTZ_F16_F32:
860 case AMDGPUISD::CVT_PKNORM_I16_F32:
861 case AMDGPUISD::CVT_PKNORM_U16_F32:
862 case AMDGPUISD::CVT_PK_U16_U32:
863 case AMDGPUISD::CVT_PK_I16_I32: {
865 if (
N->getValueType(0) == MVT::i32) {
866 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
868 { N->getOperand(0), N->getOperand(1) });
876 SelectINTRINSIC_W_CHAIN(
N);
880 SelectINTRINSIC_WO_CHAIN(
N);
884 SelectINTRINSIC_VOID(
N);
888 SelectWAVE_ADDRESS(
N);
892 SelectSTACKRESTORE(
N);
900bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
903 return Term->getMetadata(
"amdgpu.uniform") ||
904 Term->getMetadata(
"structurizecfg.uniform");
907bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
908 unsigned ShAmtBits)
const {
911 const APInt &
RHS =
N->getConstantOperandAPInt(1);
912 if (
RHS.countr_one() >= ShAmtBits)
942 N1 =
Lo.getOperand(1);
952 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
967 return "AMDGPU DAG->DAG Pattern Instruction Selection";
977#ifdef EXPENSIVE_CHECKS
983 for (
auto &L : LI.getLoopsInPreorder())
984 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1006 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1008 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1022SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1024 SDNode *Mov =
CurDAG->getMachineNode(
1025 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1026 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1031void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1036 unsigned Opcode =
N->getOpcode();
1045 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1046 DL, MVT::i32,
LHS, Sub0);
1047 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1048 DL, MVT::i32,
LHS, Sub1);
1050 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1051 DL, MVT::i32,
RHS, Sub0);
1052 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1053 DL, MVT::i32,
RHS, Sub1);
1055 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1057 static const unsigned OpcMap[2][2][2] = {
1058 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1059 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1060 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1061 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1063 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1064 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1067 if (!ConsumeCarry) {
1069 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1072 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1079 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1082 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1089 MVT::i64, RegSequenceArgs);
1100void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1105 if (
N->isDivergent()) {
1107 : AMDGPU::V_SUBB_U32_e64;
1109 N,
Opc,
N->getVTList(),
1111 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1114 : AMDGPU::S_SUB_CO_PSEUDO;
1115 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1119void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1124 bool IsVALU =
N->isDivergent();
1126 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1128 if (UI.getUse().getResNo() == 1) {
1129 if (UI->isMachineOpcode()) {
1130 if (UI->getMachineOpcode() !=
1131 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1144 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1147 N,
Opc,
N->getVTList(),
1148 {N->getOperand(0), N->getOperand(1),
1149 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1151 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1153 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1154 {N->getOperand(0), N->getOperand(1)});
1158void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1162 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1163 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1164 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1165 Ops[8] =
N->getOperand(0);
1166 Ops[9] =
N->getOperand(4);
1170 bool UseFMAC = Subtarget->hasDLInsts() &&
1174 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1175 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1178void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1182 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1183 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1184 Ops[6] =
N->getOperand(0);
1185 Ops[7] =
N->getOperand(3);
1187 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1192void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1193 EVT VT =
N->getValueType(0);
1195 assert(VT == MVT::f32 || VT == MVT::f64);
1198 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1203 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1204 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1205 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1211void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1215 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1216 if (Subtarget->hasMADIntraFwdBug())
1217 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1218 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1219 else if (UseNoCarry)
1220 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1222 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1225 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1229 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1240void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1245 if (Subtarget->hasMadU64U32NoCarry()) {
1246 VTList =
CurDAG->getVTList(MVT::i64);
1247 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1249 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1250 if (Subtarget->hasMADIntraFwdBug()) {
1251 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1252 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1254 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1261 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1263 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1264 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1265 MVT::i32,
SDValue(Mad, 0), Sub0);
1269 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1270 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1271 MVT::i32,
SDValue(Mad, 0), Sub1);
1281 if (!
Base || Subtarget->hasUsableDSOffset() ||
1282 Subtarget->unsafeDSOffsetFoldingEnabled())
1293 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1306 int64_t ByteOffset =
C->getSExtValue();
1307 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1316 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1322 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1323 if (Subtarget->hasAddNoCarryInsts()) {
1324 SubOp = AMDGPU::V_SUB_U32_e64;
1326 CurDAG->getTargetConstant(0, {}, MVT::i1));
1329 MachineSDNode *MachineSub =
1330 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1346 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1348 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1349 DL, MVT::i32, Zero);
1351 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1358 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1362bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1364 unsigned Size)
const {
1365 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1370 if (!
Base || Subtarget->hasUsableDSOffset() ||
1371 Subtarget->unsafeDSOffsetFoldingEnabled())
1389bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1395 if (Subtarget->hasSignedScratchOffsets())
1405 ConstantSDNode *ImmOp =
nullptr;
1416bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1422 if (Subtarget->hasSignedScratchOffsets())
1432bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1446 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1449 auto LHS =
Base.getOperand(0);
1450 auto RHS =
Base.getOperand(1);
1458 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1464 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1469 unsigned Size)
const {
1472 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1477 unsigned OffsetValue1 = OffsetValue0 +
Size;
1480 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1482 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1483 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1488 if (
const ConstantSDNode *
C =
1490 unsigned OffsetValue0 =
C->getZExtValue();
1491 unsigned OffsetValue1 = OffsetValue0 +
Size;
1493 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1503 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1507 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1508 if (Subtarget->hasAddNoCarryInsts()) {
1509 SubOp = AMDGPU::V_SUB_U32_e64;
1511 CurDAG->getTargetConstant(0, {}, MVT::i1));
1514 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1519 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1521 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1527 unsigned OffsetValue0 = CAddr->getZExtValue();
1528 unsigned OffsetValue1 = OffsetValue0 +
Size;
1530 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1532 MachineSDNode *MovZero =
1533 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1535 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1536 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1544 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1545 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1555 if (Subtarget->useFlatForGlobal())
1560 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1561 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1562 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1563 SOffset = Subtarget->hasRestrictedSOffset()
1564 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1565 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1567 ConstantSDNode *C1 =
nullptr;
1569 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1582 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1588 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1604 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1606 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1610 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1620 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1631 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1637bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1640 SDValue Ptr, Offen, Idxen, Addr64;
1644 if (!Subtarget->hasAddr64())
1647 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1651 if (
C->getSExtValue()) {
1664std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1669 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1675 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1678bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1684 MachineFunction &
MF =
CurDAG->getMachineFunction();
1685 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1687 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1690 int64_t
Imm = CAddr->getSExtValue();
1691 const int64_t NullPtr =
1694 if (Imm != NullPtr) {
1697 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1698 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1699 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1700 VAddr =
SDValue(MovHighBits, 0);
1702 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1703 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1708 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1729 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1730 if (
TII->isLegalMUBUFImmOffset(C1) &&
1731 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1732 CurDAG->SignBitIsZero(N0))) {
1733 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1734 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1740 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1741 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1749 if (!
Reg.isPhysical())
1751 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1752 return RC &&
TRI.isSGPRClass(RC);
1755bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1760 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1761 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1762 MachineFunction &
MF =
CurDAG->getMachineFunction();
1763 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1768 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1774 ConstantSDNode *CAddr;
1787 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1792 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1798bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1801 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1802 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1804 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1810 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1823bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1825 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1826 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1830 SOffset = ByteOffsetNode;
1848bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1850 uint64_t FlatVariant)
const {
1851 int64_t OffsetVal = 0;
1855 bool CanHaveFlatSegmentOffsetBug =
1856 Subtarget->hasFlatSegmentOffsetBug() &&
1860 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1862 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1864 isFlatScratchBaseLegal(Addr))) {
1873 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1874 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1876 OffsetVal = COffsetVal;
1889 uint64_t RemainderOffset;
1891 std::tie(OffsetVal, RemainderOffset) =
1892 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1895 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1902 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1903 if (Subtarget->hasAddNoCarryInsts()) {
1904 AddOp = AMDGPU::V_ADD_U32_e64;
1913 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1915 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1917 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1918 DL, MVT::i32, N0, Sub0);
1919 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1920 DL, MVT::i32, N0, Sub1);
1923 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1925 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1928 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1929 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1931 SDNode *Addc =
CurDAG->getMachineNode(
1932 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1936 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1941 MVT::i64, RegSequenceArgs),
1950 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1954bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1960bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1966bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1969 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1977 if (
Op.getValueType() == MVT::i32)
1992bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
1995 bool NeedIOffset)
const {
1996 int64_t ImmOffset = 0;
1997 ScaleOffset =
false;
2003 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2005 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2011 ImmOffset = COffsetVal;
2012 }
else if (!
LHS->isDivergent()) {
2013 if (COffsetVal > 0) {
2018 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2020 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2024 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2026 SDNode *VMov =
CurDAG->getMachineNode(
2027 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2028 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2031 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2041 unsigned NumLiterals =
2042 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2043 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2044 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2053 if (!
LHS->isDivergent()) {
2056 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2058 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2065 if (!SAddr && !
RHS->isDivergent()) {
2067 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2069 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2076 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2081 if (Subtarget->hasScaleOffset() &&
2082 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2097 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2110 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2111 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2113 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2117bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2122 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2130bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2135 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2140 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2146bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2152 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2157 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2163bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2168 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2172 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2176bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2182 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2188 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2194bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2200 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2221 FI->getValueType(0));
2231bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2239 int64_t COffsetVal = 0;
2241 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2250 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2254 int64_t SplitImmOffset, RemainderOffset;
2255 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2258 COffsetVal = SplitImmOffset;
2262 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2263 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2264 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2269 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2275bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2277 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2283 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2290 return (VMax & 3) + (
SMax & 3) >= 4;
2293bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2297 int64_t ImmOffset = 0;
2301 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2303 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2308 ImmOffset = COffsetVal;
2309 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2313 int64_t SplitImmOffset, RemainderOffset;
2314 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2318 SDNode *VMov =
CurDAG->getMachineNode(
2319 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2320 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2323 if (!isFlatScratchBaseLegal(Addr))
2325 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2327 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2328 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2340 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2343 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2350 if (OrigAddr != Addr) {
2351 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2354 if (!isFlatScratchBaseLegalSV(OrigAddr))
2358 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2361 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2363 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2372bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2375 int64_t ImmOffset)
const {
2376 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2378 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2390 bool IsSigned)
const {
2391 bool ScaleOffset =
false;
2392 if (!Subtarget->hasScaleOffset() || !
Offset)
2406 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2407 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2408 (
Offset.isMachineOpcode() &&
2409 Offset.getMachineOpcode() ==
2410 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2411 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2413 ScaleOffset =
C->getZExtValue() ==
Size;
2425bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2427 bool Imm32Only,
bool IsBuffer,
2428 bool HasSOffset, int64_t ImmOffset,
2429 bool *ScaleOffset)
const {
2431 "Cannot match both soffset and offset at the same time!");
2436 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2446 *SOffset = ByteOffsetNode;
2447 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2453 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2460 SDLoc SL(ByteOffsetNode);
2464 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2466 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2467 if (EncodedOffset &&
Offset && !Imm32Only) {
2468 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2477 if (EncodedOffset &&
Offset && Imm32Only) {
2478 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2486 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2488 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2495SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2502 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2503 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2504 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2505 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2508 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2510 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2511 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2513 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2516 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2523bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2526 bool IsBuffer,
bool HasSOffset,
2528 bool *ScaleOffset)
const {
2530 assert(!Imm32Only && !IsBuffer);
2533 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2538 ImmOff =
C->getSExtValue();
2540 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2541 true, ImmOff, ScaleOffset);
2561 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2562 ImmOffset, ScaleOffset)) {
2566 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2567 ImmOffset, ScaleOffset)) {
2576 bool Imm32Only,
bool *ScaleOffset)
const {
2577 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2580 SBase = Expand32BitAddress(SBase);
2585 SBase = Expand32BitAddress(Addr);
2586 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2593bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2595 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2599bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2602 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2609 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2610 false, &ScaleOffset))
2614 SDLoc(
N), MVT::i32);
2618bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2623 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2627 SDLoc(
N), MVT::i32);
2632 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2636bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2639 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2643bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2647 return N.getValueType() == MVT::i32 &&
2648 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2653bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2658 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2683SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2687 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2691 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2693 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2697 uint32_t PackedVal =
Offset | (Width << 16);
2698 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2700 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2703void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2708 const SDValue &Shl =
N->getOperand(0);
2713 uint32_t BVal =
B->getZExtValue();
2714 uint32_t CVal =
C->getZExtValue();
2716 if (0 < BVal && BVal <= CVal && CVal < 32) {
2726void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2727 switch (
N->getOpcode()) {
2729 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2732 const SDValue &Srl =
N->getOperand(0);
2736 if (Shift && Mask) {
2738 uint32_t MaskVal =
Mask->getZExtValue();
2750 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2757 if (Shift && Mask) {
2759 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2768 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2769 SelectS_BFEFromShifts(
N);
2774 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2775 SelectS_BFEFromShifts(
N);
2790 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2800bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2802 if (!
N->hasOneUse())
2812 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2816 if (VT == MVT::i64) {
2819 Subtarget->hasScalarCompareEq64();
2822 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2855void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2858 if (
Cond.isUndef()) {
2859 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2860 N->getOperand(2),
N->getOperand(0));
2864 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2866 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2867 bool AndExec = !UseSCCBr;
2868 bool Negate =
false;
2871 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2886 bool NegatedBallot =
false;
2889 UseSCCBr = !BallotCond->isDivergent();
2890 Negate = Negate ^ NegatedBallot;
2905 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2906 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2907 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2926 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2928 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2936 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2941void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2942 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2943 !
N->isDivergent()) {
2945 if (Src.getValueType() == MVT::f16) {
2947 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2957void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2960 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2961 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2966 MachineMemOperand *MMO =
M->getMemOperand();
2970 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
2975 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2976 N = glueCopyToM0(
N, PtrBase);
2977 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2982 N = glueCopyToM0(
N, Ptr);
2983 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2988 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2993 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2999void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3002 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3003 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3004 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3006 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3007 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3009 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3010 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3013 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3014 N->getOperand(5),
N->getOperand(0)};
3017 MachineMemOperand *MMO =
M->getMemOperand();
3018 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3022void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3023 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3025 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3037 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3038 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3050 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3055 case Intrinsic::amdgcn_ds_gws_init:
3056 return AMDGPU::DS_GWS_INIT;
3057 case Intrinsic::amdgcn_ds_gws_barrier:
3058 return AMDGPU::DS_GWS_BARRIER;
3059 case Intrinsic::amdgcn_ds_gws_sema_v:
3060 return AMDGPU::DS_GWS_SEMA_V;
3061 case Intrinsic::amdgcn_ds_gws_sema_br:
3062 return AMDGPU::DS_GWS_SEMA_BR;
3063 case Intrinsic::amdgcn_ds_gws_sema_p:
3064 return AMDGPU::DS_GWS_SEMA_P;
3065 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3066 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3072void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3073 if (!Subtarget->hasGWS() ||
3074 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3075 !Subtarget->hasGWSSemaReleaseAll())) {
3082 const bool HasVSrc =
N->getNumOperands() == 4;
3083 assert(HasVSrc ||
N->getNumOperands() == 3);
3086 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3089 MachineMemOperand *MMO =
M->getMemOperand();
3102 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3103 ImmOffset = ConstOffset->getZExtValue();
3105 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3114 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3118 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3120 CurDAG->getTargetConstant(16, SL, MVT::i32));
3121 glueCopyToM0(
N,
SDValue(M0Base, 0));
3125 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3129 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3130 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3132 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3136 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3139 MVT DataVT =
Data.getValueType().getSimpleVT();
3140 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3142 Ops.push_back(
N->getOperand(2));
3148 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3150 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3152 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3155 SL, MVT::v2i32, RegSeqOps),
3160 Ops.push_back(OffsetField);
3161 Ops.push_back(Chain);
3163 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3167void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3168 if (Subtarget->getLDSBankCount() != 16) {
3198 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3201 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3202 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3208 SDNode *InterpP1LV =
3209 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3210 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3214 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3217 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3218 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3225void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3226 unsigned IntrID =
N->getConstantOperandVal(1);
3228 case Intrinsic::amdgcn_ds_append:
3229 case Intrinsic::amdgcn_ds_consume: {
3230 if (
N->getValueType(0) != MVT::i32)
3232 SelectDSAppendConsume(
N, IntrID);
3235 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3236 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3237 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3238 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3239 SelectDSBvhStackIntrinsic(
N, IntrID);
3241 case Intrinsic::amdgcn_init_whole_wave:
3242 CurDAG->getMachineFunction()
3243 .getInfo<SIMachineFunctionInfo>()
3244 ->setInitWholeWave();
3251void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3252 unsigned IntrID =
N->getConstantOperandVal(0);
3253 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3254 SDNode *ConvGlueNode =
N->getGluedNode();
3260 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3261 MVT::Glue,
SDValue(ConvGlueNode, 0));
3263 ConvGlueNode =
nullptr;
3266 case Intrinsic::amdgcn_wqm:
3267 Opcode = AMDGPU::WQM;
3269 case Intrinsic::amdgcn_softwqm:
3270 Opcode = AMDGPU::SOFT_WQM;
3272 case Intrinsic::amdgcn_wwm:
3273 case Intrinsic::amdgcn_strict_wwm:
3274 Opcode = AMDGPU::STRICT_WWM;
3276 case Intrinsic::amdgcn_strict_wqm:
3277 Opcode = AMDGPU::STRICT_WQM;
3279 case Intrinsic::amdgcn_interp_p1_f16:
3280 SelectInterpP1F16(
N);
3282 case Intrinsic::amdgcn_permlane16_swap:
3283 case Intrinsic::amdgcn_permlane32_swap: {
3284 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3285 !Subtarget->hasPermlane16Swap()) ||
3286 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3287 !Subtarget->hasPermlane32Swap())) {
3292 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3293 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3294 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3298 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3300 bool FI =
N->getConstantOperandVal(3);
3301 NewOps[2] =
CurDAG->getTargetConstant(
3304 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3312 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3314 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3319 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3320 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3324void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3325 unsigned IntrID =
N->getConstantOperandVal(1);
3327 case Intrinsic::amdgcn_ds_gws_init:
3328 case Intrinsic::amdgcn_ds_gws_barrier:
3329 case Intrinsic::amdgcn_ds_gws_sema_v:
3330 case Intrinsic::amdgcn_ds_gws_sema_br:
3331 case Intrinsic::amdgcn_ds_gws_sema_p:
3332 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3333 SelectDS_GWS(
N, IntrID);
3335 case Intrinsic::amdgcn_tensor_load_to_lds:
3336 case Intrinsic::amdgcn_tensor_store_from_lds:
3337 SelectTensorLoadStore(
N, IntrID);
3346void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3348 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3349 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3350 {N->getOperand(0), Log2WaveSize});
3353void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3368 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3370 if (
N->isDivergent()) {
3371 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3376 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3377 {SrcVal, Log2WaveSize}),
3381 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3385bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3387 bool IsCanonicalizing,
3388 bool AllowAbs)
const {
3394 Src = Src.getOperand(0);
3395 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3399 if (
LHS &&
LHS->isZero()) {
3401 Src = Src.getOperand(1);
3405 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3407 Src = Src.getOperand(0);
3420 if (IsCanonicalizing)
3435 EVT VT = Src.getValueType();
3437 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3444 auto ReplaceSrc = [&]() ->
SDValue {
3446 return Src.getOperand(0);
3451 Src.getValueType(),
LHS, Index);
3477 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3479 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3486bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3489 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3491 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3498bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3501 if (SelectVOP3ModsImpl(In, Src, Mods,
3504 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3511bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3519bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3523 if (SelectVOP3ModsImpl(In, Src, Mods,
3528 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3535bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3537 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3540bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3542 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3545bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3549 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3550 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3552 return SelectVOP3Mods(In, Src, SrcMods);
3555bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3559 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3560 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3562 return SelectVOP3BMods(In, Src, SrcMods);
3565bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3570 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3571 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3576bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3577 SDValue &SrcMods,
bool IsDOT)
const {
3584 Src = Src.getOperand(0);
3588 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3589 unsigned VecMods = Mods;
3591 SDValue Lo = stripBitcast(Src.getOperand(0));
3592 SDValue Hi = stripBitcast(Src.getOperand(1));
3595 Lo = stripBitcast(
Lo.getOperand(0));
3600 Hi = stripBitcast(
Hi.getOperand(0));
3610 unsigned VecSize = Src.getValueSizeInBits();
3611 Lo = stripExtractLoElt(
Lo);
3612 Hi = stripExtractLoElt(
Hi);
3614 if (
Lo.getValueSizeInBits() > VecSize) {
3615 Lo =
CurDAG->getTargetExtractSubreg(
3616 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3620 if (
Hi.getValueSizeInBits() > VecSize) {
3621 Hi =
CurDAG->getTargetExtractSubreg(
3622 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3626 assert(
Lo.getValueSizeInBits() <= VecSize &&
3627 Hi.getValueSizeInBits() <= VecSize);
3629 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3633 if (VecSize ==
Lo.getValueSizeInBits()) {
3635 }
else if (VecSize == 32) {
3636 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3638 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3642 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3643 Lo.getValueType()), 0);
3644 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3645 : AMDGPU::SReg_64RegClassID;
3647 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3648 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3649 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3651 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3652 Src.getValueType(),
Ops), 0);
3654 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3660 .bitcastToAPInt().getZExtValue();
3662 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3663 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3670 Src.getNumOperands() == 2) {
3676 ArrayRef<int>
Mask = SVN->getMask();
3678 if (Mask[0] < 2 && Mask[1] < 2) {
3680 SDValue ShuffleSrc = SVN->getOperand(0);
3693 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3701 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3705bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3707 return SelectVOP3PMods(In, Src, SrcMods,
true);
3710bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3712 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3721bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3723 SelectVOP3Mods(In, Src, SrcMods);
3726 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3730bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3732 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3741bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3744 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3747 unsigned SrcVal =
C->getZExtValue();
3751 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3758 unsigned DstRegClass;
3760 switch (Elts.
size()) {
3762 DstRegClass = AMDGPU::VReg_256RegClassID;
3766 DstRegClass = AMDGPU::VReg_128RegClassID;
3770 DstRegClass = AMDGPU::VReg_64RegClassID;
3778 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3779 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3780 Ops.push_back(Elts[i]);
3781 Ops.push_back(
CurDAG->getTargetConstant(
3784 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3791 assert(
"unhandled Reg sequence size" &&
3792 (Elts.
size() == 8 || Elts.
size() == 16));
3796 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3797 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3802 if (Subtarget->useRealTrue16Insts()) {
3807 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3810 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3811 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3812 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3813 MVT::i32, {Elts[i + 1],
Undef},
3814 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3816 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3818 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3819 {Elts[i + 1], Elts[i], PackLoLo});
3823 return buildRegSequence32(PackedElts,
DL);
3829 unsigned ElementSize)
const {
3830 if (ElementSize == 16)
3831 return buildRegSequence16(Elts,
DL);
3832 if (ElementSize == 32)
3833 return buildRegSequence32(Elts,
DL);
3837void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3841 unsigned ElementSize)
const {
3846 for (
auto El : Elts) {
3849 NegAbsElts.
push_back(El->getOperand(0));
3851 if (Elts.size() != NegAbsElts.
size()) {
3853 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3857 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3863 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3871 std::function<
bool(
SDValue)> ModifierCheck) {
3875 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3876 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3877 if (!ModifierCheck(ElF16))
3884bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3902 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
3921 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
3927 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3931bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3942 if (EltsF16.
empty())
3952 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
3962 if (EltsV2F16.
empty())
3971 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
3974 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3978bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3988 unsigned ModOpcode =
3999 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4002 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4006bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4008 BitVector UndefElements;
4010 if (isInlineImmediate(
Splat.getNode())) {
4012 unsigned Imm =
C->getAPIntValue().getSExtValue();
4013 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4017 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4018 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4026 SDValue SplatSrc32 = stripBitcast(In);
4028 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4029 SDValue SplatSrc16 = stripBitcast(Splat32);
4032 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4033 std::optional<APInt> RawValue;
4035 RawValue =
C->getValueAPF().bitcastToAPInt();
4037 RawValue =
C->getAPIntValue();
4039 if (RawValue.has_value()) {
4040 EVT VT =
In.getValueType().getScalarType();
4046 if (
TII->isInlineConstant(FloatVal)) {
4047 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4052 if (
TII->isInlineConstant(RawValue.value())) {
4053 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4066bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4072 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4081 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4085bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4091 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4100 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4104bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4112 const SDValue &ExtendSrc =
In.getOperand(0);
4116 const SDValue &CastSrc =
In.getOperand(0);
4120 if (Zero &&
Zero->getZExtValue() == 0)
4131 Src = ExtractVecEltSrc;
4135 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4139bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4143 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4147bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4150 return SelectVOP3Mods(In, Src, SrcMods);
4162 Op =
Op.getOperand(0);
4164 IsExtractHigh =
false;
4167 if (!Low16 || !Low16->isZero())
4169 Op = stripBitcast(
Op.getOperand(1));
4170 if (
Op.getValueType() != MVT::bf16)
4175 if (
Op.getValueType() != MVT::i32)
4180 if (Mask->getZExtValue() == 0xffff0000) {
4181 IsExtractHigh =
true;
4182 return Op.getOperand(0);
4191 return Op.getOperand(0);
4200bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4204 SelectVOP3ModsImpl(In, Src, Mods);
4206 bool IsExtractHigh =
false;
4208 Src = Src.getOperand(0);
4209 }
else if (VT == MVT::bf16) {
4217 if (Src.getValueType() != VT &&
4218 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4221 Src = stripBitcast(Src);
4227 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4242 if (Src.getValueSizeInBits() == 16) {
4251 Src.getOperand(0).getValueType() == MVT::i32) {
4252 Src = Src.getOperand(0);
4256 if (Subtarget->useRealTrue16Insts())
4258 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4259 }
else if (IsExtractHigh)
4265bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4268 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4270 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4274bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4277 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4278 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4282bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4285 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4287 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4291bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4294 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4295 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4303 unsigned NumOpcodes = 0;
4316 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4319 if (
C->isAllOnes()) {
4329 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4343 if (Src.size() == 3) {
4349 if (
C->isAllOnes()) {
4351 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4352 if (Src[
I] ==
LHS) {
4364 Bits = SrcBits[Src.size()];
4369 switch (In.getOpcode()) {
4377 if (!getOperandBits(
LHS, LHSBits) ||
4378 !getOperandBits(
RHS, RHSBits)) {
4379 Src = std::move(Backup);
4380 return std::make_pair(0, 0);
4386 NumOpcodes +=
Op.first;
4387 LHSBits =
Op.second;
4392 NumOpcodes +=
Op.first;
4393 RHSBits =
Op.second;
4398 return std::make_pair(0, 0);
4402 switch (In.getOpcode()) {
4404 TTbl = LHSBits & RHSBits;
4407 TTbl = LHSBits | RHSBits;
4410 TTbl = LHSBits ^ RHSBits;
4416 return std::make_pair(NumOpcodes + 1, TTbl);
4423 unsigned NumOpcodes;
4425 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4429 if (NumOpcodes < 2 || Src.empty())
4435 if (NumOpcodes < 4 && !In->isDivergent())
4438 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4443 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4444 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4458 while (Src.size() < 3)
4459 Src.push_back(Src[0]);
4465 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4471 return CurDAG->getUNDEF(MVT::i32);
4475 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4480 return CurDAG->getConstant(
4481 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4491bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4492 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4494 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4495 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4498 bool AllUsesAcceptSReg =
true;
4500 Limit < 10 && U !=
E; ++U, ++Limit) {
4501 const TargetRegisterClass *RC =
4502 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4510 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4511 RC != &AMDGPU::VS_64_Align2RegClass) {
4512 AllUsesAcceptSReg =
false;
4513 SDNode *
User =
U->getUser();
4514 if (
User->isMachineOpcode()) {
4515 unsigned Opc =
User->getMachineOpcode();
4516 const MCInstrDesc &
Desc = SII->get(
Opc);
4517 if (
Desc.isCommutable()) {
4518 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4521 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4522 const TargetRegisterClass *CommutedRC =
4523 getOperandRegClass(
U->getUser(), CommutedOpNo);
4524 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4525 CommutedRC == &AMDGPU::VS_64RegClass ||
4526 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4527 AllUsesAcceptSReg =
true;
4535 if (!AllUsesAcceptSReg)
4539 return !AllUsesAcceptSReg && (Limit < 10);
4542bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4544 const MachineMemOperand *MMO = Ld->getMemOperand();
4562 (Subtarget->getScalarizeGlobalBehavior() &&
4566 ->isMemOpHasNoClobberedMemOperand(
N)));
4572 bool IsModified =
false;
4578 while (Position !=
CurDAG->allnodes_end()) {
4585 if (ResNode !=
Node) {
4591 CurDAG->RemoveDeadNodes();
4592 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.