34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
158 if (
MI.isConvergent())
186 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
191 for (
auto Op :
MI.uses()) {
192 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
198 if (FromCycle ==
nullptr)
204 while (FromCycle && !FromCycle->
contains(ToCycle)) {
224 int64_t &Offset1)
const {
232 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
236 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
252 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
253 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
254 if (Offset0Idx == -1 || Offset1Idx == -1)
261 Offset0Idx -=
get(Opc0).NumDefs;
262 Offset1Idx -=
get(Opc1).NumDefs;
292 if (!Load0Offset || !Load1Offset)
309 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
310 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
312 if (OffIdx0 == -1 || OffIdx1 == -1)
318 OffIdx0 -=
get(Opc0).NumDefs;
319 OffIdx1 -=
get(Opc1).NumDefs;
338 case AMDGPU::DS_READ2ST64_B32:
339 case AMDGPU::DS_READ2ST64_B64:
340 case AMDGPU::DS_WRITE2ST64_B32:
341 case AMDGPU::DS_WRITE2ST64_B64:
356 OffsetIsScalable =
false;
373 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
375 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
376 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
389 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
390 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
391 if (Offset0 + 1 != Offset1)
402 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
410 Offset = EltSize * Offset0;
412 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
413 if (DataOpIdx == -1) {
414 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
416 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
432 if (BaseOp && !BaseOp->
isFI())
440 if (SOffset->
isReg())
446 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
448 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
457 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
458 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
460 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
461 if (VAddr0Idx >= 0) {
463 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
470 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
485 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
504 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
521 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
529 if (MO1->getAddrSpace() != MO2->getAddrSpace())
532 const auto *Base1 = MO1->getValue();
533 const auto *Base2 = MO2->getValue();
534 if (!Base1 || !Base2)
542 return Base1 == Base2;
546 int64_t Offset1,
bool OffsetIsScalable1,
548 int64_t Offset2,
bool OffsetIsScalable2,
549 unsigned ClusterSize,
550 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
582 const unsigned LoadSize = NumBytes / ClusterSize;
583 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
584 return NumDWords <= MaxMemoryClusterDWords;
598 int64_t Offset0, int64_t Offset1,
599 unsigned NumLoads)
const {
600 assert(Offset1 > Offset0 &&
601 "Second offset should be larger than first offset!");
606 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
613 const char *Msg =
"illegal VGPR to SGPR copy") {
634 assert((
TII.getSubtarget().hasMAIInsts() &&
635 !
TII.getSubtarget().hasGFX90AInsts()) &&
636 "Expected GFX908 subtarget.");
639 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
640 "Source register of the copy should be either an SGPR or an AGPR.");
643 "Destination register of the copy should be an AGPR.");
652 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
655 if (!Def->modifiesRegister(SrcReg, &RI))
658 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
659 Def->getOperand(0).getReg() != SrcReg)
666 bool SafeToPropagate =
true;
669 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
670 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
671 SafeToPropagate =
false;
673 if (!SafeToPropagate)
676 for (
auto I = Def;
I !=
MI; ++
I)
677 I->clearRegisterKills(DefOp.
getReg(), &RI);
686 if (ImpUseSuperReg) {
687 Builder.addReg(ImpUseSuperReg,
695 RS.enterBasicBlockEnd(
MBB);
696 RS.backward(std::next(
MI));
705 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
708 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
709 "VGPR used for an intermediate copy should have been reserved.");
714 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
724 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
725 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
726 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
733 if (ImpUseSuperReg) {
734 UseBuilder.
addReg(ImpUseSuperReg,
755 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
756 int16_t SubIdx = BaseIndices[Idx];
757 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
758 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
759 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
760 unsigned Opcode = AMDGPU::S_MOV_B32;
763 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
764 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
765 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
769 DestSubReg = RI.getSubReg(DestReg, SubIdx);
770 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
771 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
772 Opcode = AMDGPU::S_MOV_B64;
787 assert(FirstMI && LastMI);
795 LastMI->addRegisterKilled(SrcReg, &RI);
801 Register SrcReg,
bool KillSrc,
bool RenamableDest,
802 bool RenamableSrc)
const {
804 unsigned Size = RI.getRegSizeInBits(*RC);
806 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
812 if (((
Size == 16) != (SrcSize == 16))) {
814 assert(ST.useRealTrue16Insts());
816 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
819 if (DestReg == SrcReg) {
825 RC = RI.getPhysRegBaseClass(DestReg);
826 Size = RI.getRegSizeInBits(*RC);
827 SrcRC = RI.getPhysRegBaseClass(SrcReg);
828 SrcSize = RI.getRegSizeInBits(*SrcRC);
832 if (RC == &AMDGPU::VGPR_32RegClass) {
834 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
835 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
836 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
837 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
843 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
844 RC == &AMDGPU::SReg_32RegClass) {
845 if (SrcReg == AMDGPU::SCC) {
852 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
853 if (DestReg == AMDGPU::VCC_LO) {
871 if (RC == &AMDGPU::SReg_64RegClass) {
872 if (SrcReg == AMDGPU::SCC) {
879 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
880 if (DestReg == AMDGPU::VCC) {
898 if (DestReg == AMDGPU::SCC) {
901 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
905 assert(ST.hasScalarCompareEq64());
919 if (RC == &AMDGPU::AGPR_32RegClass) {
920 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
921 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
927 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
936 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
943 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
944 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
946 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
947 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
948 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
949 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
952 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
953 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
966 if (IsAGPRDst || IsAGPRSrc) {
967 if (!DstLow || !SrcLow) {
969 "Cannot use hi16 subreg with an AGPR!");
976 if (ST.useRealTrue16Insts()) {
982 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
983 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
995 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
996 if (!DstLow || !SrcLow) {
998 "Cannot use hi16 subreg on VI!");
1021 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1022 if (ST.hasVMovB64Inst()) {
1027 if (ST.hasPkMovB32()) {
1043 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1044 if (RI.isSGPRClass(RC)) {
1045 if (!RI.isSGPRClass(SrcRC)) {
1049 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1055 unsigned EltSize = 4;
1056 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1057 if (RI.isAGPRClass(RC)) {
1058 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1059 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1060 else if (RI.hasVGPRs(SrcRC) ||
1061 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1062 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1064 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1065 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1066 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1067 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1068 (RI.isProperlyAlignedRC(*RC) &&
1069 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1071 if (ST.hasVMovB64Inst()) {
1072 Opcode = AMDGPU::V_MOV_B64_e32;
1074 }
else if (ST.hasPkMovB32()) {
1075 Opcode = AMDGPU::V_PK_MOV_B32;
1085 std::unique_ptr<RegScavenger> RS;
1086 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1087 RS = std::make_unique<RegScavenger>();
1093 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1094 const bool CanKillSuperReg = KillSrc && !Overlap;
1096 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1099 SubIdx = SubIndices[Idx];
1101 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1102 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1103 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1104 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1106 bool IsFirstSubreg = Idx == 0;
1107 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1109 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1113 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1114 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1160 int64_t &ImmVal)
const {
1161 switch (
MI.getOpcode()) {
1162 case AMDGPU::V_MOV_B32_e32:
1163 case AMDGPU::S_MOV_B32:
1164 case AMDGPU::S_MOVK_I32:
1165 case AMDGPU::S_MOV_B64:
1166 case AMDGPU::V_MOV_B64_e32:
1167 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1168 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1169 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1170 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1171 case AMDGPU::V_MOV_B64_PSEUDO:
1172 case AMDGPU::V_MOV_B16_t16_e32: {
1176 return MI.getOperand(0).getReg() == Reg;
1181 case AMDGPU::V_MOV_B16_t16_e64: {
1183 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1185 return MI.getOperand(0).getReg() == Reg;
1190 case AMDGPU::S_BREV_B32:
1191 case AMDGPU::V_BFREV_B32_e32:
1192 case AMDGPU::V_BFREV_B32_e64: {
1196 return MI.getOperand(0).getReg() == Reg;
1201 case AMDGPU::S_NOT_B32:
1202 case AMDGPU::V_NOT_B32_e32:
1203 case AMDGPU::V_NOT_B32_e64: {
1206 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1207 return MI.getOperand(0).getReg() == Reg;
1217std::optional<int64_t>
1222 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1223 return std::nullopt;
1226 if (Def && Def->isMoveImmediate()) {
1232 return std::nullopt;
1237 if (RI.isAGPRClass(DstRC))
1238 return AMDGPU::COPY;
1239 if (RI.getRegSizeInBits(*DstRC) == 16) {
1242 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1244 if (RI.getRegSizeInBits(*DstRC) == 32)
1245 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1246 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1247 return AMDGPU::S_MOV_B64;
1248 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1249 return AMDGPU::V_MOV_B64_PSEUDO;
1250 return AMDGPU::COPY;
1255 bool IsIndirectSrc)
const {
1256 if (IsIndirectSrc) {
1258 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1260 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1262 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1264 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1266 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1268 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1270 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1272 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1274 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1276 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1278 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1280 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1282 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1283 if (VecSize <= 1024)
1284 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1290 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1292 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1294 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1296 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1298 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1300 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1302 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1304 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1306 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1308 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1310 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1312 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1314 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1315 if (VecSize <= 1024)
1316 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1323 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1325 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1327 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1329 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1331 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1333 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1335 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1337 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1339 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1341 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1343 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1345 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1347 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1348 if (VecSize <= 1024)
1349 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1356 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1358 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1360 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1362 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1364 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1366 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1368 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1370 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1372 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1374 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1376 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1378 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1380 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1381 if (VecSize <= 1024)
1382 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1389 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1391 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1393 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1395 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1396 if (VecSize <= 1024)
1397 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1404 bool IsSGPR)
const {
1416 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1423 return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE;
1425 return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE;
1427 return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE;
1429 return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE
1430 : AMDGPU::SI_SPILL_S128_SAVE;
1432 return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE
1433 : AMDGPU::SI_SPILL_S160_SAVE;
1435 return NeedsCFI ? AMDGPU::SI_SPILL_S192_CFI_SAVE
1436 : AMDGPU::SI_SPILL_S192_SAVE;
1438 return NeedsCFI ? AMDGPU::SI_SPILL_S224_CFI_SAVE
1439 : AMDGPU::SI_SPILL_S224_SAVE;
1441 return AMDGPU::SI_SPILL_S256_SAVE;
1443 return AMDGPU::SI_SPILL_S288_SAVE;
1445 return AMDGPU::SI_SPILL_S320_SAVE;
1447 return AMDGPU::SI_SPILL_S352_SAVE;
1449 return AMDGPU::SI_SPILL_S384_SAVE;
1451 return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE
1452 : AMDGPU::SI_SPILL_S512_SAVE;
1454 return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE
1455 : AMDGPU::SI_SPILL_S1024_SAVE;
1464 return AMDGPU::SI_SPILL_V16_SAVE;
1466 return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE;
1468 return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE;
1470 return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE;
1472 return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE
1473 : AMDGPU::SI_SPILL_V128_SAVE;
1475 return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE
1476 : AMDGPU::SI_SPILL_V160_SAVE;
1478 return NeedsCFI ? AMDGPU::SI_SPILL_V192_CFI_SAVE
1479 : AMDGPU::SI_SPILL_V192_SAVE;
1481 return NeedsCFI ? AMDGPU::SI_SPILL_V224_CFI_SAVE
1482 : AMDGPU::SI_SPILL_V224_SAVE;
1484 return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE
1485 : AMDGPU::SI_SPILL_V256_SAVE;
1487 return NeedsCFI ? AMDGPU::SI_SPILL_V288_CFI_SAVE
1488 : AMDGPU::SI_SPILL_V288_SAVE;
1490 return NeedsCFI ? AMDGPU::SI_SPILL_V320_CFI_SAVE
1491 : AMDGPU::SI_SPILL_V320_SAVE;
1493 return NeedsCFI ? AMDGPU::SI_SPILL_V352_CFI_SAVE
1494 : AMDGPU::SI_SPILL_V352_SAVE;
1496 return NeedsCFI ? AMDGPU::SI_SPILL_V384_CFI_SAVE
1497 : AMDGPU::SI_SPILL_V384_SAVE;
1499 return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE
1500 : AMDGPU::SI_SPILL_V512_SAVE;
1502 return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE
1503 : AMDGPU::SI_SPILL_V1024_SAVE;
1512 return NeedsCFI ? AMDGPU::SI_SPILL_AV32_CFI_SAVE
1513 : AMDGPU::SI_SPILL_AV32_SAVE;
1515 return NeedsCFI ? AMDGPU::SI_SPILL_AV64_CFI_SAVE
1516 : AMDGPU::SI_SPILL_AV64_SAVE;
1518 return NeedsCFI ? AMDGPU::SI_SPILL_AV96_CFI_SAVE
1519 : AMDGPU::SI_SPILL_AV96_SAVE;
1521 return NeedsCFI ? AMDGPU::SI_SPILL_AV128_CFI_SAVE
1522 : AMDGPU::SI_SPILL_AV128_SAVE;
1524 return NeedsCFI ? AMDGPU::SI_SPILL_AV160_CFI_SAVE
1525 : AMDGPU::SI_SPILL_AV160_SAVE;
1527 return NeedsCFI ? AMDGPU::SI_SPILL_AV192_CFI_SAVE
1528 : AMDGPU::SI_SPILL_AV192_SAVE;
1530 return NeedsCFI ? AMDGPU::SI_SPILL_AV224_CFI_SAVE
1531 : AMDGPU::SI_SPILL_AV224_SAVE;
1533 return NeedsCFI ? AMDGPU::SI_SPILL_AV256_CFI_SAVE
1534 : AMDGPU::SI_SPILL_AV256_SAVE;
1536 return AMDGPU::SI_SPILL_AV288_SAVE;
1538 return AMDGPU::SI_SPILL_AV320_SAVE;
1540 return AMDGPU::SI_SPILL_AV352_SAVE;
1542 return AMDGPU::SI_SPILL_AV384_SAVE;
1544 return NeedsCFI ? AMDGPU::SI_SPILL_AV512_CFI_SAVE
1545 : AMDGPU::SI_SPILL_AV512_SAVE;
1547 return NeedsCFI ? AMDGPU::SI_SPILL_AV1024_CFI_SAVE
1548 : AMDGPU::SI_SPILL_AV1024_SAVE;
1555 bool IsVectorSuperClass) {
1560 if (IsVectorSuperClass)
1561 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1563 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1569 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1576 if (ST.hasMAIInsts())
1582void SIInstrInfo::storeRegToStackSlotImpl(
1595 FrameInfo.getObjectAlign(FrameIndex));
1596 unsigned SpillSize = RI.getSpillSize(*RC);
1602 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1603 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1604 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1613 if (SrcReg.
isVirtual() && SpillSize == 4) {
1627 SpillSize, *MFI, NeedsCFI);
1642 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC, VReg, Flags,
1651 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC,
Register(),
1658 return AMDGPU::SI_SPILL_S32_RESTORE;
1660 return AMDGPU::SI_SPILL_S64_RESTORE;
1662 return AMDGPU::SI_SPILL_S96_RESTORE;
1664 return AMDGPU::SI_SPILL_S128_RESTORE;
1666 return AMDGPU::SI_SPILL_S160_RESTORE;
1668 return AMDGPU::SI_SPILL_S192_RESTORE;
1670 return AMDGPU::SI_SPILL_S224_RESTORE;
1672 return AMDGPU::SI_SPILL_S256_RESTORE;
1674 return AMDGPU::SI_SPILL_S288_RESTORE;
1676 return AMDGPU::SI_SPILL_S320_RESTORE;
1678 return AMDGPU::SI_SPILL_S352_RESTORE;
1680 return AMDGPU::SI_SPILL_S384_RESTORE;
1682 return AMDGPU::SI_SPILL_S512_RESTORE;
1684 return AMDGPU::SI_SPILL_S1024_RESTORE;
1693 return AMDGPU::SI_SPILL_V16_RESTORE;
1695 return AMDGPU::SI_SPILL_V32_RESTORE;
1697 return AMDGPU::SI_SPILL_V64_RESTORE;
1699 return AMDGPU::SI_SPILL_V96_RESTORE;
1701 return AMDGPU::SI_SPILL_V128_RESTORE;
1703 return AMDGPU::SI_SPILL_V160_RESTORE;
1705 return AMDGPU::SI_SPILL_V192_RESTORE;
1707 return AMDGPU::SI_SPILL_V224_RESTORE;
1709 return AMDGPU::SI_SPILL_V256_RESTORE;
1711 return AMDGPU::SI_SPILL_V288_RESTORE;
1713 return AMDGPU::SI_SPILL_V320_RESTORE;
1715 return AMDGPU::SI_SPILL_V352_RESTORE;
1717 return AMDGPU::SI_SPILL_V384_RESTORE;
1719 return AMDGPU::SI_SPILL_V512_RESTORE;
1721 return AMDGPU::SI_SPILL_V1024_RESTORE;
1730 return AMDGPU::SI_SPILL_AV32_RESTORE;
1732 return AMDGPU::SI_SPILL_AV64_RESTORE;
1734 return AMDGPU::SI_SPILL_AV96_RESTORE;
1736 return AMDGPU::SI_SPILL_AV128_RESTORE;
1738 return AMDGPU::SI_SPILL_AV160_RESTORE;
1740 return AMDGPU::SI_SPILL_AV192_RESTORE;
1742 return AMDGPU::SI_SPILL_AV224_RESTORE;
1744 return AMDGPU::SI_SPILL_AV256_RESTORE;
1746 return AMDGPU::SI_SPILL_AV288_RESTORE;
1748 return AMDGPU::SI_SPILL_AV320_RESTORE;
1750 return AMDGPU::SI_SPILL_AV352_RESTORE;
1752 return AMDGPU::SI_SPILL_AV384_RESTORE;
1754 return AMDGPU::SI_SPILL_AV512_RESTORE;
1756 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1763 bool IsVectorSuperClass) {
1768 if (IsVectorSuperClass)
1769 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1771 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1777 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1784 if (ST.hasMAIInsts())
1787 assert(!RI.isAGPRClass(RC));
1801 unsigned SpillSize = RI.getSpillSize(*RC);
1808 FrameInfo.getObjectAlign(FrameIndex));
1810 if (RI.isSGPRClass(RC)) {
1813 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1814 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1815 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1820 if (DestReg.
isVirtual() && SpillSize == 4) {
1849 unsigned Quantity)
const {
1851 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1852 while (Quantity > 0) {
1853 unsigned Arg = std::min(Quantity, MaxSNopCount);
1864 constexpr unsigned DoorbellIDMask = 0x3ff;
1865 constexpr unsigned ECQueueWaveAbort = 0x400;
1870 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1871 MBB.splitAt(
MI,
false);
1875 MBB.addSuccessor(TrapBB);
1885 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1889 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1894 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1895 .
addUse(DoorbellRegMasked)
1896 .
addImm(ECQueueWaveAbort);
1897 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1898 .
addUse(SetWaveAbortBit);
1901 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1912 return MBB.getNextNode();
1916 switch (
MI.getOpcode()) {
1918 if (
MI.isMetaInstruction())
1923 return MI.getOperand(0).getImm() + 1;
1933 switch (
MI.getOpcode()) {
1935 case AMDGPU::S_MOV_B64_term:
1938 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1941 case AMDGPU::S_MOV_B32_term:
1944 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1947 case AMDGPU::S_XOR_B64_term:
1950 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1953 case AMDGPU::S_XOR_B32_term:
1956 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1958 case AMDGPU::S_OR_B64_term:
1961 MI.setDesc(
get(AMDGPU::S_OR_B64));
1963 case AMDGPU::S_OR_B32_term:
1966 MI.setDesc(
get(AMDGPU::S_OR_B32));
1969 case AMDGPU::S_ANDN2_B64_term:
1972 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1975 case AMDGPU::S_ANDN2_B32_term:
1978 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1981 case AMDGPU::S_AND_B64_term:
1984 MI.setDesc(
get(AMDGPU::S_AND_B64));
1987 case AMDGPU::S_AND_B32_term:
1990 MI.setDesc(
get(AMDGPU::S_AND_B32));
1993 case AMDGPU::S_AND_SAVEEXEC_B64_term:
1996 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
1999 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2002 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2005 case AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term:
2006 MI.setDesc(
get(AMDGPU::V_CMPX_EQ_U32_nosdst_e32));
2008 case AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term:
2009 MI.setDesc(
get(AMDGPU::V_CMPX_EQ_U64_nosdst_e32));
2012 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2013 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2016 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2017 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2019 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2023 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2026 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2029 int64_t Imm =
MI.getOperand(1).getImm();
2031 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2032 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2037 MI.eraseFromParent();
2043 case AMDGPU::V_MOV_B64_PSEUDO: {
2045 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2046 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2054 if (ST.hasVMovB64Inst() && Mov64RC->
contains(Dst)) {
2055 MI.setDesc(Mov64Desc);
2060 if (
SrcOp.isImm()) {
2062 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2063 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2087 if (ST.hasPkMovB32() &&
2106 MI.eraseFromParent();
2109 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2113 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2117 if (ST.has64BitLiterals()) {
2118 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2124 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2129 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2130 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2132 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2133 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2138 MI.eraseFromParent();
2141 case AMDGPU::V_SET_INACTIVE_B32: {
2145 .
add(
MI.getOperand(3))
2146 .
add(
MI.getOperand(4))
2147 .
add(
MI.getOperand(1))
2148 .
add(
MI.getOperand(2))
2149 .
add(
MI.getOperand(5));
2150 MI.eraseFromParent();
2153 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2154 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2155 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2156 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2157 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2158 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2159 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2160 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2161 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2162 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2163 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2164 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2165 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2166 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2167 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2168 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2169 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2170 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2171 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2172 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2173 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2174 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2175 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2176 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2177 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2178 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2179 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2180 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2181 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2182 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2183 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2184 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2185 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2189 if (RI.hasVGPRs(EltRC)) {
2190 Opc = AMDGPU::V_MOVRELD_B32_e32;
2192 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2193 : AMDGPU::S_MOVRELD_B32;
2198 bool IsUndef =
MI.getOperand(1).isUndef();
2199 unsigned SubReg =
MI.getOperand(3).getImm();
2200 assert(VecReg ==
MI.getOperand(1).getReg());
2205 .
add(
MI.getOperand(2))
2209 const int ImpDefIdx =
2211 const int ImpUseIdx = ImpDefIdx + 1;
2213 MI.eraseFromParent();
2216 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2217 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2218 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2219 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2220 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2221 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2222 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2223 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2224 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2225 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2226 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2227 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2228 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2229 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2230 assert(ST.useVGPRIndexMode());
2232 bool IsUndef =
MI.getOperand(1).isUndef();
2241 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2245 .
add(
MI.getOperand(2))
2249 const int ImpDefIdx =
2251 const int ImpUseIdx = ImpDefIdx + 1;
2258 MI.eraseFromParent();
2261 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2262 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2263 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2264 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2265 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2266 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2267 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2268 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2269 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2270 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2271 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2272 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2273 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2274 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2275 assert(ST.useVGPRIndexMode());
2278 bool IsUndef =
MI.getOperand(1).isUndef();
2282 .
add(
MI.getOperand(2))
2295 MI.eraseFromParent();
2298 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2301 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2302 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2321 if (ST.hasGetPCZeroExtension()) {
2325 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2332 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2342 MI.eraseFromParent();
2345 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2355 Op.setOffset(
Op.getOffset() + 4);
2357 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2361 MI.eraseFromParent();
2364 case AMDGPU::ENTER_STRICT_WWM: {
2370 case AMDGPU::ENTER_STRICT_WQM: {
2377 MI.eraseFromParent();
2380 case AMDGPU::EXIT_STRICT_WWM:
2381 case AMDGPU::EXIT_STRICT_WQM: {
2387 case AMDGPU::SI_RETURN: {
2401 MI.eraseFromParent();
2405 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2406 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2407 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2410 case AMDGPU::S_GETPC_B64_pseudo:
2411 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2412 if (ST.hasGetPCZeroExtension()) {
2414 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2423 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2424 assert(ST.hasBF16PackedInsts());
2425 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2436 case AMDGPU::GET_STACK_BASE:
2439 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2446 Register DestReg =
MI.getOperand(0).getReg();
2456 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2457 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2458 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2461 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2464 MI.getNumExplicitOperands());
2482 case AMDGPU::S_MOV_B64:
2483 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2492 if (UsedLanes.
all())
2497 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2498 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2500 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2501 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2503 if (NeedLo && NeedHi)
2507 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2509 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2518 case AMDGPU::S_LOAD_DWORDX16_IMM:
2519 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2532 for (
auto &CandMO :
I->operands()) {
2533 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2541 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2545 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2551 unsigned NewOpcode = -1;
2552 if (SubregSize == 256)
2553 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2554 else if (SubregSize == 128)
2555 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2565 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2570 MI->getOperand(0).setReg(DestReg);
2571 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2575 OffsetMO->
setImm(FinalOffset);
2581 MI->setMemRefs(*MF, NewMMOs);
2594std::pair<MachineInstr*, MachineInstr*>
2596 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2598 if (ST.hasVMovB64Inst() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2601 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2602 return std::pair(&
MI,
nullptr);
2613 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2615 if (Dst.isPhysical()) {
2616 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2623 for (
unsigned I = 1;
I <= 2; ++
I) {
2626 if (
SrcOp.isImm()) {
2628 Imm.ashrInPlace(Part * 32);
2629 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2633 if (Src.isPhysical())
2634 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2641 MovDPP.addImm(MO.getImm());
2643 Split[Part] = MovDPP;
2647 if (Dst.isVirtual())
2654 MI.eraseFromParent();
2655 return std::pair(Split[0], Split[1]);
2658std::optional<DestSourcePair>
2660 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2663 return std::nullopt;
2667 AMDGPU::OpName Src0OpName,
2669 AMDGPU::OpName Src1OpName)
const {
2676 "All commutable instructions have both src0 and src1 modifiers");
2678 int Src0ModsVal = Src0Mods->
getImm();
2679 int Src1ModsVal = Src1Mods->
getImm();
2681 Src1Mods->
setImm(Src0ModsVal);
2682 Src0Mods->
setImm(Src1ModsVal);
2691 bool IsKill = RegOp.
isKill();
2693 bool IsUndef = RegOp.
isUndef();
2694 bool IsDebug = RegOp.
isDebug();
2696 if (NonRegOp.
isImm())
2698 else if (NonRegOp.
isFI())
2719 int64_t NonRegVal = NonRegOp1.
getImm();
2722 NonRegOp2.
setImm(NonRegVal);
2729 unsigned OpIdx1)
const {
2734 unsigned Opc =
MI.getOpcode();
2735 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2745 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2748 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2753 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2759 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2774 unsigned Src1Idx)
const {
2775 assert(!NewMI &&
"this should never be used");
2777 unsigned Opc =
MI.getOpcode();
2779 if (CommutedOpcode == -1)
2782 if (Src0Idx > Src1Idx)
2785 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2786 static_cast<int>(Src0Idx) &&
2787 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2788 static_cast<int>(Src1Idx) &&
2789 "inconsistency with findCommutedOpIndices");
2814 Src1, AMDGPU::OpName::src1_modifiers);
2817 AMDGPU::OpName::src1_sel);
2829 unsigned &SrcOpIdx0,
2830 unsigned &SrcOpIdx1)
const {
2835 unsigned &SrcOpIdx0,
2836 unsigned &SrcOpIdx1)
const {
2837 if (!
Desc.isCommutable())
2840 unsigned Opc =
Desc.getOpcode();
2841 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2845 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2849 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2853 int64_t BrOffset)
const {
2870 return MI.getOperand(0).getMBB();
2875 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2876 MI.getOpcode() == AMDGPU::SI_LOOP)
2888 "new block should be inserted for expanding unconditional branch");
2891 "restore block should be inserted for restoring clobbered registers");
2899 if (ST.useAddPC64Inst()) {
2901 MCCtx.createTempSymbol(
"offset",
true);
2905 MCCtx.createTempSymbol(
"post_addpc",
true);
2906 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2910 Offset->setVariableValue(OffsetExpr);
2914 assert(RS &&
"RegScavenger required for long branching");
2922 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2923 ST.hasVALUReadSGPRHazard();
2924 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2925 if (FlushSGPRWrites)
2933 ApplyHazardWorkarounds();
2936 MCCtx.createTempSymbol(
"post_getpc",
true);
2940 MCCtx.createTempSymbol(
"offset_lo",
true);
2942 MCCtx.createTempSymbol(
"offset_hi",
true);
2945 .
addReg(PCReg, {}, AMDGPU::sub0)
2949 .
addReg(PCReg, {}, AMDGPU::sub1)
2951 ApplyHazardWorkarounds();
2992 if (LongBranchReservedReg) {
2993 RS->enterBasicBlock(
MBB);
2994 Scav = LongBranchReservedReg;
2996 RS->enterBasicBlockEnd(
MBB);
2997 Scav = RS->scavengeRegisterBackwards(
3002 RS->setRegUsed(Scav);
3010 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3027unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3029 case SIInstrInfo::SCC_TRUE:
3030 return AMDGPU::S_CBRANCH_SCC1;
3031 case SIInstrInfo::SCC_FALSE:
3032 return AMDGPU::S_CBRANCH_SCC0;
3033 case SIInstrInfo::VCCNZ:
3034 return AMDGPU::S_CBRANCH_VCCNZ;
3035 case SIInstrInfo::VCCZ:
3036 return AMDGPU::S_CBRANCH_VCCZ;
3037 case SIInstrInfo::EXECNZ:
3038 return AMDGPU::S_CBRANCH_EXECNZ;
3039 case SIInstrInfo::EXECZ:
3040 return AMDGPU::S_CBRANCH_EXECZ;
3046SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3048 case AMDGPU::S_CBRANCH_SCC0:
3050 case AMDGPU::S_CBRANCH_SCC1:
3052 case AMDGPU::S_CBRANCH_VCCNZ:
3054 case AMDGPU::S_CBRANCH_VCCZ:
3056 case AMDGPU::S_CBRANCH_EXECNZ:
3058 case AMDGPU::S_CBRANCH_EXECZ:
3070 bool AllowModify)
const {
3071 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3073 TBB =
I->getOperand(0).getMBB();
3077 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3078 if (Pred == INVALID_BR)
3083 Cond.push_back(
I->getOperand(1));
3087 if (
I ==
MBB.end()) {
3093 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3095 FBB =
I->getOperand(0).getMBB();
3105 bool AllowModify)
const {
3113 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3114 switch (
I->getOpcode()) {
3115 case AMDGPU::S_MOV_B64_term:
3116 case AMDGPU::S_XOR_B64_term:
3117 case AMDGPU::S_OR_B64_term:
3118 case AMDGPU::S_ANDN2_B64_term:
3119 case AMDGPU::S_AND_B64_term:
3120 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3121 case AMDGPU::S_MOV_B32_term:
3122 case AMDGPU::S_XOR_B32_term:
3123 case AMDGPU::S_OR_B32_term:
3124 case AMDGPU::S_ANDN2_B32_term:
3125 case AMDGPU::S_AND_B32_term:
3126 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3127 case AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term:
3128 case AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term:
3131 case AMDGPU::SI_ELSE:
3132 case AMDGPU::SI_KILL_I1_TERMINATOR:
3133 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3150 int *BytesRemoved)
const {
3152 unsigned RemovedSize = 0;
3155 if (
MI.isBranch() ||
MI.isReturn()) {
3157 MI.eraseFromParent();
3163 *BytesRemoved = RemovedSize;
3180 int *BytesAdded)
const {
3181 if (!FBB &&
Cond.empty()) {
3185 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3192 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3204 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3222 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3229 if (
Cond.size() != 2) {
3233 if (
Cond[0].isImm()) {
3244 Register FalseReg,
int &CondCycles,
3245 int &TrueCycles,
int &FalseCycles)
const {
3255 CondCycles = TrueCycles = FalseCycles = NumInsts;
3258 return RI.hasVGPRs(RC) && NumInsts <= 6;
3272 if (NumInsts % 2 == 0)
3275 CondCycles = TrueCycles = FalseCycles = NumInsts;
3276 return RI.isSGPRClass(RC);
3287 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3288 if (Pred == VCCZ || Pred == SCC_FALSE) {
3289 Pred =
static_cast<BranchPredicate
>(-Pred);
3295 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3297 if (DstSize == 32) {
3299 if (Pred == SCC_TRUE) {
3314 if (DstSize == 64 && Pred == SCC_TRUE) {
3324 static const int16_t Sub0_15[] = {
3325 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3326 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3327 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3328 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3331 static const int16_t Sub0_15_64[] = {
3332 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3333 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3334 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3335 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3338 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3340 const int16_t *SubIndices = Sub0_15;
3341 int NElts = DstSize / 32;
3345 if (Pred == SCC_TRUE) {
3347 SelOp = AMDGPU::S_CSELECT_B32;
3348 EltRC = &AMDGPU::SGPR_32RegClass;
3350 SelOp = AMDGPU::S_CSELECT_B64;
3351 EltRC = &AMDGPU::SGPR_64RegClass;
3352 SubIndices = Sub0_15_64;
3358 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3363 for (
int Idx = 0; Idx != NElts; ++Idx) {
3367 unsigned SubIdx = SubIndices[Idx];
3370 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3372 .
addReg(FalseReg, {}, SubIdx)
3373 .addReg(TrueReg, {}, SubIdx);
3376 .
addReg(TrueReg, {}, SubIdx)
3377 .addReg(FalseReg, {}, SubIdx);
3390 if (
MI.isBranch() ||
MI.isCall() ||
MI.isReturn() ||
MI.isIndirectBranch())
3393 switch (
MI.getOpcode()) {
3394 case AMDGPU::S_ENDPGM:
3395 case AMDGPU::S_ENDPGM_SAVED:
3396 case AMDGPU::S_TRAP:
3397 case AMDGPU::S_GETREG_B32:
3398 case AMDGPU::S_SETREG_B32:
3399 case AMDGPU::S_SETREG_B32_mode:
3400 case AMDGPU::S_SETREG_IMM32_B32:
3401 case AMDGPU::S_SETREG_IMM32_B32_mode:
3402 case AMDGPU::S_SENDMSG:
3403 case AMDGPU::S_SENDMSGHALT:
3404 case AMDGPU::S_SENDMSG_RTN_B32:
3405 case AMDGPU::S_SENDMSG_RTN_B64:
3406 case AMDGPU::S_BARRIER_WAIT:
3407 case AMDGPU::S_BARRIER_SIGNAL_M0:
3408 case AMDGPU::S_BARRIER_SIGNAL_IMM:
3409 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
3410 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
3418 switch (
MI.getOpcode()) {
3419 case AMDGPU::V_MOV_B16_t16_e32:
3420 case AMDGPU::V_MOV_B16_t16_e64:
3421 case AMDGPU::V_MOV_B32_e32:
3422 case AMDGPU::V_MOV_B32_e64:
3423 case AMDGPU::V_MOV_B64_PSEUDO:
3424 case AMDGPU::V_MOV_B64_e32:
3425 case AMDGPU::V_MOV_B64_e64:
3426 case AMDGPU::S_MOV_B32:
3427 case AMDGPU::S_MOV_B64:
3428 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3430 case AMDGPU::WWM_COPY:
3431 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3432 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3433 case AMDGPU::V_ACCVGPR_MOV_B32:
3434 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3435 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3443 switch (
MI.getOpcode()) {
3444 case AMDGPU::V_MOV_B16_t16_e32:
3445 case AMDGPU::V_MOV_B16_t16_e64:
3447 case AMDGPU::V_MOV_B32_e32:
3448 case AMDGPU::V_MOV_B32_e64:
3449 case AMDGPU::V_MOV_B64_PSEUDO:
3450 case AMDGPU::V_MOV_B64_e32:
3451 case AMDGPU::V_MOV_B64_e64:
3452 case AMDGPU::S_MOV_B32:
3453 case AMDGPU::S_MOV_B64:
3454 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3456 case AMDGPU::WWM_COPY:
3457 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3458 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3459 case AMDGPU::V_ACCVGPR_MOV_B32:
3460 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3461 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3469 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3470 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3471 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3474 unsigned Opc =
MI.getOpcode();
3476 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3478 MI.removeOperand(Idx);
3484 MI.setDesc(NewDesc);
3490 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3491 Desc.implicit_defs().size();
3493 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3494 MI.removeOperand(
I);
3498 unsigned SubRegIndex) {
3499 switch (SubRegIndex) {
3500 case AMDGPU::NoSubRegister:
3510 case AMDGPU::sub1_lo16:
3512 case AMDGPU::sub1_hi16:
3515 return std::nullopt;
3523 case AMDGPU::V_MAC_F16_e32:
3524 case AMDGPU::V_MAC_F16_e64:
3525 case AMDGPU::V_MAD_F16_e64:
3526 return AMDGPU::V_MADAK_F16;
3527 case AMDGPU::V_MAC_F32_e32:
3528 case AMDGPU::V_MAC_F32_e64:
3529 case AMDGPU::V_MAD_F32_e64:
3530 return AMDGPU::V_MADAK_F32;
3531 case AMDGPU::V_FMAC_F32_e32:
3532 case AMDGPU::V_FMAC_F32_e64:
3533 case AMDGPU::V_FMA_F32_e64:
3534 return AMDGPU::V_FMAAK_F32;
3535 case AMDGPU::V_FMAC_F16_e32:
3536 case AMDGPU::V_FMAC_F16_e64:
3537 case AMDGPU::V_FMAC_F16_t16_e64:
3538 case AMDGPU::V_FMAC_F16_fake16_e64:
3539 case AMDGPU::V_FMAC_F16_t16_e32:
3540 case AMDGPU::V_FMAC_F16_fake16_e32:
3541 case AMDGPU::V_FMA_F16_e64:
3542 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3543 ? AMDGPU::V_FMAAK_F16_t16
3544 : AMDGPU::V_FMAAK_F16_fake16
3545 : AMDGPU::V_FMAAK_F16;
3546 case AMDGPU::V_FMAC_F64_e32:
3547 case AMDGPU::V_FMAC_F64_e64:
3548 case AMDGPU::V_FMA_F64_e64:
3549 return AMDGPU::V_FMAAK_F64;
3557 case AMDGPU::V_MAC_F16_e32:
3558 case AMDGPU::V_MAC_F16_e64:
3559 case AMDGPU::V_MAD_F16_e64:
3560 return AMDGPU::V_MADMK_F16;
3561 case AMDGPU::V_MAC_F32_e32:
3562 case AMDGPU::V_MAC_F32_e64:
3563 case AMDGPU::V_MAD_F32_e64:
3564 return AMDGPU::V_MADMK_F32;
3565 case AMDGPU::V_FMAC_F32_e32:
3566 case AMDGPU::V_FMAC_F32_e64:
3567 case AMDGPU::V_FMA_F32_e64:
3568 return AMDGPU::V_FMAMK_F32;
3569 case AMDGPU::V_FMAC_F16_e32:
3570 case AMDGPU::V_FMAC_F16_e64:
3571 case AMDGPU::V_FMAC_F16_t16_e64:
3572 case AMDGPU::V_FMAC_F16_fake16_e64:
3573 case AMDGPU::V_FMAC_F16_t16_e32:
3574 case AMDGPU::V_FMAC_F16_fake16_e32:
3575 case AMDGPU::V_FMA_F16_e64:
3576 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3577 ? AMDGPU::V_FMAMK_F16_t16
3578 : AMDGPU::V_FMAMK_F16_fake16
3579 : AMDGPU::V_FMAMK_F16;
3580 case AMDGPU::V_FMAC_F64_e32:
3581 case AMDGPU::V_FMAC_F64_e64:
3582 case AMDGPU::V_FMA_F64_e64:
3583 return AMDGPU::V_FMAMK_F64;
3597 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3600 if (
Opc == AMDGPU::COPY) {
3601 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3608 if (HasMultipleUses) {
3611 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3614 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3622 if (ImmDefSize == 32 &&
3627 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3628 RI.getSubRegIdxSize(UseSubReg) == 16;
3631 if (RI.hasVGPRs(DstRC))
3634 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3640 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3647 for (
unsigned MovOp :
3648 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3649 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3657 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3661 if (MovDstPhysReg) {
3665 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3672 if (MovDstPhysReg) {
3673 if (!MovDstRC->
contains(MovDstPhysReg))
3689 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3697 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3701 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3703 UseMI.getOperand(0).setReg(MovDstPhysReg);
3708 UseMI.setDesc(NewMCID);
3709 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3710 UseMI.addImplicitDefUseOperands(*MF);
3714 if (HasMultipleUses)
3717 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3718 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3719 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3720 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3721 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3722 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3723 Opc == AMDGPU::V_FMAC_F64_e64) {
3732 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3743 auto CopyRegOperandToNarrowerRC =
3746 if (!
MI.getOperand(OpNo).isReg())
3750 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3753 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3754 get(AMDGPU::COPY), Tmp)
3756 MI.getOperand(OpNo).setReg(Tmp);
3757 MI.getOperand(OpNo).setIsKill();
3764 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3765 if (!RegSrc->
isReg())
3768 ST.getConstantBusLimit(
Opc) < 2)
3783 if (Def && Def->isMoveImmediate() &&
3798 unsigned SrcSubReg = RegSrc->
getSubReg();
3803 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3804 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3805 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3806 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3807 UseMI.untieRegOperand(
3808 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3815 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3816 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3820 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3821 UseMI.getOperand(0).getReg())
3823 UseMI.getOperand(0).setReg(Tmp);
3824 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3825 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3830 DefMI.eraseFromParent();
3837 if (ST.getConstantBusLimit(
Opc) < 2) {
3840 bool Src0Inlined =
false;
3841 if (Src0->
isReg()) {
3846 if (Def && Def->isMoveImmediate() &&
3851 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3852 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3858 if (Src1->
isReg() && !Src0Inlined) {
3861 if (Def && Def->isMoveImmediate() &&
3865 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3878 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3879 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3880 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3881 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3882 UseMI.untieRegOperand(
3883 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3885 const std::optional<int64_t> SubRegImm =
3895 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3896 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3900 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3901 UseMI.getOperand(0).getReg())
3903 UseMI.getOperand(0).setReg(Tmp);
3904 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3905 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3915 DefMI.eraseFromParent();
3927 if (BaseOps1.
size() != BaseOps2.
size())
3929 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3930 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3938 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3939 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3940 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3942 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3945bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3948 int64_t Offset0, Offset1;
3951 bool Offset0IsScalable, Offset1IsScalable;
3965 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3966 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3973 "MIa must load from or modify a memory location");
3975 "MIb must load from or modify a memory location");
3997 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4004 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4014 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4028 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4039 if (
Reg.isPhysical())
4043 Imm = Def->getOperand(1).getImm();
4063 unsigned NumOps =
MI.getNumOperands();
4066 if (
Op.isReg() &&
Op.isKill())
4074 case AMDGPU::V_MAC_F16_e32:
4075 case AMDGPU::V_MAC_F16_e64:
4076 return AMDGPU::V_MAD_F16_e64;
4077 case AMDGPU::V_MAC_F32_e32:
4078 case AMDGPU::V_MAC_F32_e64:
4079 return AMDGPU::V_MAD_F32_e64;
4080 case AMDGPU::V_MAC_LEGACY_F32_e32:
4081 case AMDGPU::V_MAC_LEGACY_F32_e64:
4082 return AMDGPU::V_MAD_LEGACY_F32_e64;
4083 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4084 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4085 return AMDGPU::V_FMA_LEGACY_F32_e64;
4086 case AMDGPU::V_FMAC_F16_e32:
4087 case AMDGPU::V_FMAC_F16_e64:
4088 case AMDGPU::V_FMAC_F16_t16_e64:
4089 case AMDGPU::V_FMAC_F16_fake16_e64:
4090 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4091 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4092 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4093 : AMDGPU::V_FMA_F16_gfx9_e64;
4094 case AMDGPU::V_FMAC_F32_e32:
4095 case AMDGPU::V_FMAC_F32_e64:
4096 return AMDGPU::V_FMA_F32_e64;
4097 case AMDGPU::V_FMAC_F64_e32:
4098 case AMDGPU::V_FMAC_F64_e64:
4099 return AMDGPU::V_FMA_F64_e64;
4119 if (
MI.isBundle()) {
4122 if (
MI.getBundleSize() != 1)
4124 CandidateMI =
MI.getNextNode();
4128 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4132 if (
MI.isBundle()) {
4137 MI.untieRegOperand(MO.getOperandNo());
4145 if (Def.isEarlyClobber() && Def.isReg() &&
4150 auto UpdateDefIndex = [&](
LiveRange &LR) {
4151 auto *S = LR.find(OldIndex);
4152 if (S != LR.end() && S->start == OldIndex) {
4153 assert(S->valno && S->valno->def == OldIndex);
4154 S->start = NewIndex;
4155 S->valno->def = NewIndex;
4159 for (
auto &SR : LI.subranges())
4165 if (U.RemoveMIUse) {
4168 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4172 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4173 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4174 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4175 U.RemoveMIUse->removeOperand(
I);
4180 if (
MI.isBundle()) {
4184 if (MO.isReg() && MO.getReg() == DefReg) {
4185 assert(MO.getSubReg() == 0 &&
4186 "tied sub-registers in bundles currently not supported");
4187 MI.removeOperand(MO.getOperandNo());
4204 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4205 MIOp.setIsUndef(
true);
4206 MIOp.setReg(DummyReg);
4210 if (
MI.isBundle()) {
4214 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4215 MIOp.setIsUndef(
true);
4216 MIOp.setReg(DummyReg);
4229 return MI.isBundle() ? &
MI : NewMI;
4234 ThreeAddressUpdates &U)
const {
4236 unsigned Opc =
MI.getOpcode();
4240 if (NewMFMAOpc != -1) {
4243 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4244 MIB.
add(
MI.getOperand(
I));
4252 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4257 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4258 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4259 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4263 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4264 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4265 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4266 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4267 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4268 bool Src0Literal =
false;
4273 case AMDGPU::V_MAC_F16_e64:
4274 case AMDGPU::V_FMAC_F16_e64:
4275 case AMDGPU::V_FMAC_F16_t16_e64:
4276 case AMDGPU::V_FMAC_F16_fake16_e64:
4277 case AMDGPU::V_MAC_F32_e64:
4278 case AMDGPU::V_MAC_LEGACY_F32_e64:
4279 case AMDGPU::V_FMAC_F32_e64:
4280 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4281 case AMDGPU::V_FMAC_F64_e64:
4283 case AMDGPU::V_MAC_F16_e32:
4284 case AMDGPU::V_FMAC_F16_e32:
4285 case AMDGPU::V_MAC_F32_e32:
4286 case AMDGPU::V_MAC_LEGACY_F32_e32:
4287 case AMDGPU::V_FMAC_F32_e32:
4288 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4289 case AMDGPU::V_FMAC_F64_e32: {
4290 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4291 AMDGPU::OpName::src0);
4292 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4303 MachineInstrBuilder MIB;
4306 const MachineOperand *Src0Mods =
4309 const MachineOperand *Src1Mods =
4312 const MachineOperand *Src2Mods =
4318 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4319 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4321 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4323 MachineInstr *
DefMI;
4359 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4375 if (Src0Literal && !ST.hasVOP3Literal())
4403 switch (
MI.getOpcode()) {
4404 case AMDGPU::S_SET_GPR_IDX_ON:
4405 case AMDGPU::S_SET_GPR_IDX_MODE:
4406 case AMDGPU::S_SET_GPR_IDX_OFF:
4424 if (
MI.isTerminator() ||
MI.isPosition())
4428 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4431 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4437 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4438 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4439 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4440 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4441 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4446 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4447 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4448 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4462 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4467 if (
MI.memoperands_empty())
4472 unsigned AS = Memop->getAddrSpace();
4473 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4474 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4475 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4476 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4491 if (
MI.memoperands_empty())
4500 unsigned AS = Memop->getAddrSpace();
4517 if (ST.isTgSplitEnabled())
4522 if (
MI.memoperands_empty())
4527 unsigned AS = Memop->getAddrSpace();
4543 unsigned Opcode =
MI.getOpcode();
4558 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4559 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4560 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4561 Opcode == AMDGPU::S_SETHALT)
4564 if (
MI.isCall() ||
MI.isInlineAsm())
4580 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4581 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4582 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4583 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4591 if (
MI.isMetaInstruction())
4595 if (
MI.isCopyLike()) {
4596 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4600 return MI.readsRegister(AMDGPU::EXEC, &RI);
4611 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4615 switch (Imm.getBitWidth()) {
4621 ST.hasInv2PiInlineImm());
4624 ST.hasInv2PiInlineImm());
4626 return ST.has16BitInsts() &&
4628 ST.hasInv2PiInlineImm());
4635 APInt IntImm = Imm.bitcastToAPInt();
4637 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4645 return ST.has16BitInsts() &&
4648 return ST.has16BitInsts() &&
4658 switch (OperandType) {
4668 int32_t Trunc =
static_cast<int32_t
>(Imm);
4712 int16_t Trunc =
static_cast<int16_t
>(Imm);
4713 return ST.has16BitInsts() &&
4722 int16_t Trunc =
static_cast<int16_t
>(Imm);
4723 return ST.has16BitInsts() &&
4774 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4780 return ST.hasVOP3Literal();
4784 int64_t ImmVal)
const {
4786 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4787 if (Src1Idx != -1 &&
isDPP(
Opc) && !ST.hasDPPSrc1SGPR() &&
4788 OpNo ==
static_cast<unsigned>(Src1Idx))
4793 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4794 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4795 AMDGPU::OpName::src2))
4797 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4809 "unexpected imm-like operand kind");
4822 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4840 AMDGPU::OpName
OpName)
const {
4842 return Mods && Mods->
getImm();
4855 switch (
MI.getOpcode()) {
4856 default:
return false;
4858 case AMDGPU::V_ADDC_U32_e64:
4859 case AMDGPU::V_SUBB_U32_e64:
4860 case AMDGPU::V_SUBBREV_U32_e64: {
4863 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4868 case AMDGPU::V_MAC_F16_e64:
4869 case AMDGPU::V_MAC_F32_e64:
4870 case AMDGPU::V_MAC_LEGACY_F32_e64:
4871 case AMDGPU::V_FMAC_F16_e64:
4872 case AMDGPU::V_FMAC_F16_t16_e64:
4873 case AMDGPU::V_FMAC_F16_fake16_e64:
4874 case AMDGPU::V_FMAC_F32_e64:
4875 case AMDGPU::V_FMAC_F64_e64:
4876 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4877 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4882 case AMDGPU::V_CNDMASK_B32_e64:
4888 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
4918 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4927 unsigned Op32)
const {
4941 Inst32.
add(
MI.getOperand(
I));
4945 int Idx =
MI.getNumExplicitDefs();
4947 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4952 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4974 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4982 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4985 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4986 AMDGPU::SReg_64RegClass.contains(Reg);
5014 switch (MO.getReg()) {
5016 case AMDGPU::VCC_LO:
5017 case AMDGPU::VCC_HI:
5019 case AMDGPU::FLAT_SCR:
5032 switch (
MI.getOpcode()) {
5033 case AMDGPU::V_READLANE_B32:
5034 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5035 case AMDGPU::V_WRITELANE_B32:
5036 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5043 if (
MI.isPreISelOpcode() ||
5044 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5062 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5073 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5074 ErrInfo =
"illegal copy from vector register to SGPR";
5092 if (!MRI.
isSSA() &&
MI.isCopy())
5093 return verifyCopy(
MI, MRI, ErrInfo);
5095 if (SIInstrInfo::isGenericOpcode(Opcode))
5098 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5099 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5100 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5102 if (Src0Idx == -1) {
5104 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5105 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5106 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5107 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5112 if (!
Desc.isVariadic() &&
5113 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5114 ErrInfo =
"Instruction has wrong number of operands.";
5118 if (
MI.isInlineAsm()) {
5131 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5132 ErrInfo =
"inlineasm operand has incorrect register class.";
5140 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5141 ErrInfo =
"missing memory operand from image instruction.";
5146 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5149 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5150 "all fp values to integers.";
5155 int16_t RegClass = getOpRegClassID(OpInfo);
5157 switch (OpInfo.OperandType) {
5159 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5160 ErrInfo =
"Illegal immediate value for operand.";
5193 ErrInfo =
"Illegal immediate value for operand.";
5202 if (ST.has64BitLiterals() &&
Desc.getSize() != 4 && MO.
isImm() &&
5205 OpInfo.OperandType ==
5207 ErrInfo =
"illegal 64-bit immediate value for operand.";
5214 ErrInfo =
"Expected inline constant for operand.";
5228 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5229 ErrInfo =
"Expected immediate, but got non-immediate";
5238 if (OpInfo.isGenericType())
5253 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5254 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5256 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5258 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5259 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5266 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5267 ErrInfo =
"Subtarget requires even aligned vector registers";
5272 if (RegClass != -1) {
5273 if (Reg.isVirtual())
5278 ErrInfo =
"Operand has incorrect register class.";
5286 if (!ST.hasSDWA()) {
5287 ErrInfo =
"SDWA is not supported on this target";
5291 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5292 AMDGPU::OpName::dst_sel}) {
5296 int64_t Imm = MO->
getImm();
5298 ErrInfo =
"Invalid SDWA selection";
5303 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5305 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5310 if (!ST.hasSDWAScalar()) {
5312 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5313 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5320 "Only reg allowed as operands in SDWA instructions on GFX9+";
5326 if (!ST.hasSDWAOmod()) {
5329 if (OMod !=
nullptr &&
5331 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5336 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5337 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5338 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5339 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5342 unsigned Mods = Src0ModsMO->
getImm();
5345 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5351 if (
isVOPC(BasicOpcode)) {
5352 if (!ST.hasSDWASdst() && DstIdx != -1) {
5355 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5356 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5359 }
else if (!ST.hasSDWAOutModsVOPC()) {
5362 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5363 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5369 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5370 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5377 if (DstUnused && DstUnused->isImm() &&
5380 if (!Dst.isReg() || !Dst.isTied()) {
5381 ErrInfo =
"Dst register should have tied register";
5386 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5389 "Dst register should be tied to implicit use of preserved register";
5393 ErrInfo =
"Dst register should use same physical register as preserved";
5399 if (
isDPP(
MI) && !ST.hasDPPSrc1SGPR() && Src1Idx != -1) {
5401 if (Src1MO.
isReg() && RI.isSGPRReg(MRI, Src1MO.
getReg())) {
5402 ErrInfo =
"DPP src1 cannot be SGPR on this subtarget";
5405 if (Src1MO.
isImm()) {
5406 ErrInfo =
"DPP src1 cannot be an immediate on this subtarget";
5412 if (
isImage(Opcode) && !
MI.mayStore()) {
5424 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5432 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5436 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5437 if (RegCount > DstSize) {
5438 ErrInfo =
"Image instruction returns too many registers for dst "
5448 Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5449 unsigned ConstantBusCount = 0;
5450 bool UsesLiteral =
false;
5453 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5457 LiteralVal = &
MI.getOperand(ImmIdx);
5466 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5477 }
else if (!MO.
isFI()) {
5484 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5494 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5495 return !RI.regsOverlap(SGPRUsed, SGPR);
5504 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5505 Opcode != AMDGPU::V_WRITELANE_B32) {
5506 ErrInfo =
"VOP* instruction violates constant bus restriction";
5510 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5511 ErrInfo =
"VOP3 instruction uses literal";
5518 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5519 unsigned SGPRCount = 0;
5522 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5530 if (MO.
getReg() != SGPRUsed)
5535 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5536 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5543 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5544 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5551 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5561 ErrInfo =
"ABS not allowed in VOP3B instructions";
5574 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5581 if (
Desc.isBranch()) {
5583 ErrInfo =
"invalid branch target for SOPK instruction";
5590 ErrInfo =
"invalid immediate for SOPK instruction";
5595 ErrInfo =
"invalid immediate for SOPK instruction";
5602 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5603 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5604 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5605 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5606 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5607 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5609 const unsigned StaticNumOps =
5610 Desc.getNumOperands() +
Desc.implicit_uses().size();
5611 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5617 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5618 ErrInfo =
"missing implicit register operands";
5624 if (!Dst->isUse()) {
5625 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5630 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5631 UseOpIdx != StaticNumOps + 1) {
5632 ErrInfo =
"movrel implicit operands should be tied";
5639 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5641 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5642 ErrInfo =
"src0 should be subreg of implicit vector use";
5650 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5651 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5657 if (
MI.mayStore() &&
5662 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5663 ErrInfo =
"scalar stores must use m0 as offset register";
5669 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5671 if (
Offset->getImm() != 0) {
5672 ErrInfo =
"subtarget does not support offsets in flat instructions";
5677 if (
isDS(
MI) && !ST.hasGDS()) {
5679 if (GDSOp && GDSOp->
getImm() != 0) {
5680 ErrInfo =
"GDS is not supported on this subtarget";
5688 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5689 AMDGPU::OpName::vaddr0);
5690 AMDGPU::OpName RSrcOpName =
5691 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5692 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5700 ErrInfo =
"dim is out of range";
5705 if (ST.hasR128A16()) {
5707 IsA16 = R128A16->
getImm() != 0;
5708 }
else if (ST.hasA16()) {
5710 IsA16 = A16->
getImm() != 0;
5713 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5715 unsigned AddrWords =
5718 unsigned VAddrWords;
5720 VAddrWords = RsrcIdx - VAddr0Idx;
5721 if (ST.hasPartialNSAEncoding() &&
5723 unsigned LastVAddrIdx = RsrcIdx - 1;
5724 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5732 if (VAddrWords != AddrWords) {
5734 <<
" but got " << VAddrWords <<
"\n");
5735 ErrInfo =
"bad vaddr size";
5745 unsigned DC = DppCt->
getImm();
5746 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5747 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5748 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5749 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5750 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5751 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5752 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5753 ErrInfo =
"Invalid dpp_ctrl value";
5756 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5757 !ST.hasDPPWavefrontShifts()) {
5758 ErrInfo =
"Invalid dpp_ctrl value: "
5759 "wavefront shifts are not supported on GFX10+";
5762 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5763 !ST.hasDPPBroadcasts()) {
5764 ErrInfo =
"Invalid dpp_ctrl value: "
5765 "broadcasts are not supported on GFX10+";
5768 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5770 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5771 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5772 !ST.hasGFX90AInsts()) {
5773 ErrInfo =
"Invalid dpp_ctrl value: "
5774 "row_newbroadcast/row_share is not supported before "
5778 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5779 ErrInfo =
"Invalid dpp_ctrl value: "
5780 "row_share and row_xmask are not supported before GFX10";
5785 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5788 ErrInfo =
"Invalid dpp_ctrl value: "
5789 "DP ALU dpp only support row_newbcast";
5796 AMDGPU::OpName DataName =
5797 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5803 if (ST.hasGFX90AInsts()) {
5804 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5805 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5806 ErrInfo =
"Invalid register class: "
5807 "vdata and vdst should be both VGPR or AGPR";
5810 if (
Data && Data2 &&
5811 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5812 ErrInfo =
"Invalid register class: "
5813 "both data operands should be VGPR or AGPR";
5817 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5818 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5819 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5820 ErrInfo =
"Invalid register class: "
5821 "agpr loads and stores not supported on this GPU";
5827 if (ST.needsAlignedVGPRs()) {
5828 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5833 if (Reg.isPhysical())
5834 return !(RI.getHWRegIndex(Reg) & 1);
5836 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5837 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5840 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5841 Opcode == AMDGPU::DS_GWS_BARRIER) {
5843 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5844 ErrInfo =
"Subtarget requires even aligned vector registers "
5845 "for DS_GWS instructions";
5851 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5852 ErrInfo =
"Subtarget requires even aligned vector registers "
5853 "for vaddr operand of image instructions";
5859 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5861 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5862 ErrInfo =
"Invalid register class: "
5863 "v_accvgpr_write with an SGPR is not supported on this GPU";
5868 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5871 ErrInfo =
"pseudo expects only physical SGPRs";
5878 if (!ST.hasScaleOffset()) {
5879 ErrInfo =
"Subtarget does not support offset scaling";
5883 ErrInfo =
"Instruction does not support offset scaling";
5892 for (
unsigned I = 0;
I < 3; ++
I) {
5898 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5899 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5901 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5902 &AMDGPU::SReg_64RegClass) ||
5903 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5904 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5913 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5915 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5917 : AMDGPU::V_MOV_B32_e32;
5927 default:
return AMDGPU::INSTRUCTION_LIST_END;
5928 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5929 case AMDGPU::COPY:
return AMDGPU::COPY;
5930 case AMDGPU::PHI:
return AMDGPU::PHI;
5931 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5932 case AMDGPU::WQM:
return AMDGPU::WQM;
5933 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5934 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5935 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5936 case AMDGPU::S_ADD_I32:
5937 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5938 case AMDGPU::S_ADDC_U32:
5939 return AMDGPU::V_ADDC_U32_e32;
5940 case AMDGPU::S_SUB_I32:
5941 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5944 case AMDGPU::S_ADD_U32:
5945 return AMDGPU::V_ADD_CO_U32_e32;
5946 case AMDGPU::S_SUB_U32:
5947 return AMDGPU::V_SUB_CO_U32_e32;
5948 case AMDGPU::S_ADD_U64_PSEUDO:
5949 return AMDGPU::V_ADD_U64_PSEUDO;
5950 case AMDGPU::S_SUB_U64_PSEUDO:
5951 return AMDGPU::V_SUB_U64_PSEUDO;
5952 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5953 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5954 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5955 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5956 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5957 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5958 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5959 case AMDGPU::S_XNOR_B32:
5960 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5961 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5962 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5963 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5964 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5965 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5966 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5967 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5968 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5969 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5970 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5971 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5972 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5973 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5974 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5975 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5976 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5977 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5978 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5979 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5980 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5981 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5982 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5983 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5984 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5985 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5986 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5987 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5988 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5989 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5990 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5991 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5992 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5993 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5994 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5995 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5996 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5997 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5998 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5999 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6000 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6001 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6002 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6003 case AMDGPU::S_CVT_F32_F16:
6004 case AMDGPU::S_CVT_HI_F32_F16:
6005 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6006 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6007 case AMDGPU::S_CVT_F16_F32:
6008 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6009 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6010 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6011 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6012 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6013 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6014 case AMDGPU::S_CEIL_F16:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6016 : AMDGPU::V_CEIL_F16_fake16_e64;
6017 case AMDGPU::S_FLOOR_F16:
6018 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6019 : AMDGPU::V_FLOOR_F16_fake16_e64;
6020 case AMDGPU::S_TRUNC_F16:
6021 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6022 : AMDGPU::V_TRUNC_F16_fake16_e64;
6023 case AMDGPU::S_RNDNE_F16:
6024 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6025 : AMDGPU::V_RNDNE_F16_fake16_e64;
6026 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6027 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6028 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6029 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6030 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6031 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6032 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6033 case AMDGPU::S_ADD_F16:
6034 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6035 : AMDGPU::V_ADD_F16_fake16_e64;
6036 case AMDGPU::S_SUB_F16:
6037 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6038 : AMDGPU::V_SUB_F16_fake16_e64;
6039 case AMDGPU::S_MIN_F16:
6040 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6041 : AMDGPU::V_MIN_F16_fake16_e64;
6042 case AMDGPU::S_MAX_F16:
6043 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6044 : AMDGPU::V_MAX_F16_fake16_e64;
6045 case AMDGPU::S_MINIMUM_F16:
6046 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6047 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6048 case AMDGPU::S_MAXIMUM_F16:
6049 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6050 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6051 case AMDGPU::S_MUL_F16:
6052 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6053 : AMDGPU::V_MUL_F16_fake16_e64;
6054 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6055 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6056 case AMDGPU::S_FMAC_F16:
6057 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6058 : AMDGPU::V_FMAC_F16_fake16_e64;
6059 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6060 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6061 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6062 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6063 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6064 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6065 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6066 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6067 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6068 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6069 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6070 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6071 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6072 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6073 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6074 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6075 case AMDGPU::S_CMP_LT_F16:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6077 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6078 case AMDGPU::S_CMP_EQ_F16:
6079 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6080 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6081 case AMDGPU::S_CMP_LE_F16:
6082 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6083 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6084 case AMDGPU::S_CMP_GT_F16:
6085 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6086 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6087 case AMDGPU::S_CMP_LG_F16:
6088 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6089 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6090 case AMDGPU::S_CMP_GE_F16:
6091 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6092 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6093 case AMDGPU::S_CMP_O_F16:
6094 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6095 : AMDGPU::V_CMP_O_F16_fake16_e64;
6096 case AMDGPU::S_CMP_U_F16:
6097 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6098 : AMDGPU::V_CMP_U_F16_fake16_e64;
6099 case AMDGPU::S_CMP_NGE_F16:
6100 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6101 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6102 case AMDGPU::S_CMP_NLG_F16:
6103 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6104 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6105 case AMDGPU::S_CMP_NGT_F16:
6106 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6107 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6108 case AMDGPU::S_CMP_NLE_F16:
6109 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6110 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6111 case AMDGPU::S_CMP_NEQ_F16:
6112 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6113 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6114 case AMDGPU::S_CMP_NLT_F16:
6115 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6116 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6117 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6118 case AMDGPU::V_S_EXP_F16_e64:
6119 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6120 : AMDGPU::V_EXP_F16_fake16_e64;
6121 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6122 case AMDGPU::V_S_LOG_F16_e64:
6123 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6124 : AMDGPU::V_LOG_F16_fake16_e64;
6125 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6126 case AMDGPU::V_S_RCP_F16_e64:
6127 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6128 : AMDGPU::V_RCP_F16_fake16_e64;
6129 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6130 case AMDGPU::V_S_RSQ_F16_e64:
6131 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6132 : AMDGPU::V_RSQ_F16_fake16_e64;
6133 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6134 case AMDGPU::V_S_SQRT_F16_e64:
6135 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6136 : AMDGPU::V_SQRT_F16_fake16_e64;
6139 "Unexpected scalar opcode without corresponding vector one!");
6188 "Not a whole wave func");
6191 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6192 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6199 unsigned OpNo)
const {
6201 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6202 Desc.operands()[OpNo].RegClass == -1) {
6205 if (Reg.isVirtual()) {
6209 return RI.getPhysRegBaseClass(Reg);
6212 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6213 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6221 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6223 unsigned Size = RI.getRegSizeInBits(*RC);
6224 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6225 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6226 : AMDGPU::V_MOV_B32_e32;
6228 Opcode = AMDGPU::COPY;
6229 else if (RI.isSGPRClass(RC))
6230 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6244 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6250 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6261 if (SubIdx == AMDGPU::sub0)
6263 if (SubIdx == AMDGPU::sub1)
6275void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6291 if (Reg.isPhysical())
6301 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6304 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6311 unsigned Opc =
MI.getOpcode();
6317 constexpr AMDGPU::OpName OpNames[] = {
6318 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6321 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6322 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6332 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6333 if (IsAGPR && !ST.hasMAIInsts())
6339 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6340 const int DataIdx = AMDGPU::getNamedOperandIdx(
6341 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6342 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6343 MI.getOperand(DataIdx).isReg() &&
6344 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6346 if ((
int)
OpIdx == DataIdx) {
6347 if (VDstIdx != -1 &&
6348 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6351 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6352 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6353 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6358 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6359 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6360 RI.isSGPRReg(MRI, MO.
getReg()))
6363 if (ST.hasFlatScratchHiInB64InstHazard() &&
6370 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6373 if (!ST.hasDPPSrc1SGPR() &&
isDPP(
MI) && RI.isSGPRReg(MRI, MO.
getReg()) &&
6374 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1))
6394 constexpr unsigned NumOps = 3;
6395 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6396 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6397 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6398 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6403 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6406 MO = &
MI.getOperand(SrcIdx);
6409 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6413 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6417 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6421 return !OpSel && !OpSelHi;
6430 int64_t RegClass = getOpRegClassID(OpInfo);
6432 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6438 if (
isVALU(
MI,
true) && !IsInlineConst &&
6442 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6443 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6447 if (!LiteralLimit--)
6457 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6465 if (--ConstantBusLimit <= 0)
6477 if (!LiteralLimit--)
6479 if (--ConstantBusLimit <= 0)
6485 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6489 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6491 !
Op.isIdenticalTo(*MO))
6501 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6516 bool Is64BitOp = Is64BitFPOp ||
6524 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6533 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6551 bool IsGFX950Only = ST.hasGFX950Insts();
6552 bool IsGFX940Only = ST.hasGFX940Insts();
6554 if (!IsGFX950Only && !IsGFX940Only)
6572 unsigned Opcode =
MI.getOpcode();
6574 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6575 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6576 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6577 case AMDGPU::V_MQSAD_U32_U8_e64:
6578 case AMDGPU::V_PK_ADD_F16:
6579 case AMDGPU::V_PK_ADD_F32:
6580 case AMDGPU::V_PK_ADD_I16:
6581 case AMDGPU::V_PK_ADD_U16:
6582 case AMDGPU::V_PK_ASHRREV_I16:
6583 case AMDGPU::V_PK_FMA_F16:
6584 case AMDGPU::V_PK_FMA_F32:
6585 case AMDGPU::V_PK_FMAC_F16_e32:
6586 case AMDGPU::V_PK_FMAC_F16_e64:
6587 case AMDGPU::V_PK_LSHLREV_B16:
6588 case AMDGPU::V_PK_LSHRREV_B16:
6589 case AMDGPU::V_PK_MAD_I16:
6590 case AMDGPU::V_PK_MAD_U16:
6591 case AMDGPU::V_PK_MAX_F16:
6592 case AMDGPU::V_PK_MAX_I16:
6593 case AMDGPU::V_PK_MAX_U16:
6594 case AMDGPU::V_PK_MIN_F16:
6595 case AMDGPU::V_PK_MIN_I16:
6596 case AMDGPU::V_PK_MIN_U16:
6597 case AMDGPU::V_PK_MOV_B32:
6598 case AMDGPU::V_PK_MUL_F16:
6599 case AMDGPU::V_PK_MUL_F32:
6600 case AMDGPU::V_PK_MUL_LO_U16:
6601 case AMDGPU::V_PK_SUB_I16:
6602 case AMDGPU::V_PK_SUB_U16:
6603 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6612 unsigned Opc =
MI.getOpcode();
6615 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6618 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6624 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6625 RI.isSGPRReg(MRI, Src0.
getReg()))
6631 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6633 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6639 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6650 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6651 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6652 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6664 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6665 RI.isVGPR(MRI, Src1.
getReg())) {
6678 if (HasImplicitSGPR || !
MI.isCommutable()) {
6695 if (CommutedOpc == -1) {
6700 MI.setDesc(
get(CommutedOpc));
6704 bool Src0Kill = Src0.
isKill();
6708 else if (Src1.
isReg()) {
6723 unsigned Opc =
MI.getOpcode();
6726 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6727 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6728 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6731 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6732 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6733 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6734 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6735 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6736 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6737 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6747 if (VOP3Idx[2] != -1) {
6759 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6760 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6762 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6764 SGPRsUsed.
insert(SGPRReg);
6768 for (
int Idx : VOP3Idx) {
6777 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6789 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6796 if (ConstantBusLimit > 0) {
6808 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6809 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6816 for (
unsigned I = 0;
I < 3; ++
I) {
6829 SRC = RI.getCommonSubClass(SRC, DstRC);
6832 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6834 if (RI.hasAGPRs(VRC)) {
6835 VRC = RI.getEquivalentVGPRClass(VRC);
6838 get(TargetOpcode::COPY), NewSrcReg)
6845 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6851 for (
unsigned i = 0; i < SubRegs; ++i) {
6854 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6855 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6861 get(AMDGPU::REG_SEQUENCE), DstReg);
6862 for (
unsigned i = 0; i < SubRegs; ++i) {
6864 MIB.
addImm(RI.getSubRegFromChannel(i));
6877 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6879 SBase->setReg(SGPR);
6882 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6890 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6891 if (OldSAddrIdx < 0)
6904 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6907 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6908 if (NewVAddrIdx < 0)
6911 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6915 if (OldVAddrIdx >= 0) {
6929 if (OldVAddrIdx == NewVAddrIdx) {
6940 assert(OldSAddrIdx == NewVAddrIdx);
6942 if (OldVAddrIdx >= 0) {
6943 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6944 AMDGPU::OpName::vdst_in);
6948 if (NewVDstIn != -1) {
6949 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6955 if (NewVDstIn != -1) {
6956 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6997 unsigned OpSubReg =
Op.getSubReg();
7000 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7016 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7019 bool ImpDef = Def->isImplicitDef();
7020 while (!ImpDef && Def && Def->isCopy()) {
7021 if (Def->getOperand(1).getReg().isPhysical())
7024 ImpDef = Def && Def->isImplicitDef();
7026 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7042 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7047 bool UseNewExecInstructions =
7056 if (UseNewExecInstructions) {
7091 for (
auto [Idx, ScalarOp] :
enumerate(ScalarOps)) {
7092 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7093 unsigned NumSubRegs =
RegSize / 32;
7094 Register VScalarOp = ScalarOp->getReg();
7097 TII.getRegClass(
TII.get(AMDGPU::V_READFIRSTLANE_B32), 1);
7099 if (NumSubRegs == 1) {
7102 TRI->getCommonSubClass(VScalarOpRC, RFLSrcRC);
7103 Common != VScalarOpRC) {
7110 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7113 if (UseNewExecInstructions) {
7115 TII.get(AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term))
7118 if (
I == LoopBB.
end())
7123 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7129 CondReg = NewCondReg;
7140 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7141 ScalarOp->setReg(CurReg);
7144 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7145 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7147 ScalarOp->setReg(PhySGPRs[Idx]);
7149 ScalarOp->setIsKill();
7153 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7154 "Unhandled register size");
7156 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7163 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7164 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7167 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7168 .
addReg(VScalarOp, VScalarOpUndef,
7169 TRI->getSubRegFromChannel(Idx + 1));
7176 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7183 NumSubRegs <= 2 ? 0 :
TRI->getSubRegFromChannel(Idx, 2);
7185 if (UseNewExecInstructions) {
7187 TII.get(AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term))
7189 .
addReg(VScalarOp, VScalarOpUndef, SubReg);
7190 if (
I == LoopBB.
end())
7194 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg)
7196 .
addReg(VScalarOp, VScalarOpUndef, SubReg);
7200 CondReg = NewCondReg;
7211 const auto *SScalarOpRC =
7217 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7218 unsigned Channel = 0;
7219 for (
Register Piece : ReadlanePieces) {
7220 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7224 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7225 ScalarOp->setReg(SScalarOp);
7227 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7228 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7230 ScalarOp->setReg(PhySGPRs[Idx]);
7232 ScalarOp->setIsKill();
7240 if (!UseNewExecInstructions) {
7252 if (UseNewExecInstructions) {
7276 assert((PhySGPRs.empty() || PhySGPRs.size() == ScalarOps.
size()) &&
7277 "Physical SGPRs must be empty or match the number of scalar operands");
7283 if (!Begin.isValid())
7285 if (!End.isValid()) {
7291 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7300 std::numeric_limits<unsigned>::max()) !=
7318 for (
auto I = Begin;
I != AfterMI;
I++) {
7319 for (
auto &MO :
I->all_uses())
7355 for (
auto &Succ : RemainderBB->
successors()) {
7380static std::tuple<unsigned, unsigned>
7388 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7389 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7396 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7413 .
addImm(AMDGPU::sub0_sub1)
7419 return std::tuple(RsrcPtr, NewSRsrc);
7456 if (
MI.getOpcode() == AMDGPU::PHI) {
7458 assert(!RI.isSGPRClass(VRC));
7461 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7463 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7479 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7482 if (RI.hasVGPRs(DstRC)) {
7486 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7488 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7506 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7511 if (DstRC != Src0RC) {
7520 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7522 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7528 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7529 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7530 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7531 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7532 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7533 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7534 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7536 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7549 ? AMDGPU::OpName::rsrc
7550 : AMDGPU::OpName::srsrc;
7555 AMDGPU::OpName SampOpName =
7556 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7565 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7573 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7577 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7587 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7588 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7589 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7590 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7592 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7599 bool isSoffsetLegal =
true;
7601 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7602 if (SoffsetIdx != -1) {
7606 isSoffsetLegal =
false;
7610 bool isRsrcLegal =
true;
7612 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7613 if (RsrcIdx != -1) {
7615 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7616 isRsrcLegal =
false;
7620 if (isRsrcLegal && isSoffsetLegal)
7648 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7652 unsigned RsrcPtr, NewSRsrc;
7659 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7660 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7666 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7667 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7680 }
else if (!VAddr && ST.hasAddr64()) {
7684 "FIXME: Need to emit flat atomics here");
7686 unsigned RsrcPtr, NewSRsrc;
7712 MIB.
addImm(CPol->getImm());
7717 MIB.
addImm(TFE->getImm());
7737 MI.removeFromParent();
7742 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7743 .addImm(AMDGPU::sub0)
7744 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7745 .addImm(AMDGPU::sub1);
7748 if (!isSoffsetLegal) {
7759 if (!isSoffsetLegal) {
7771 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7772 if (RsrcIdx != -1) {
7773 DeferredList.insert(
MI);
7778 return DeferredList.contains(
MI);
7788 if (!ST.useRealTrue16Insts())
7791 unsigned Opcode =
MI.getOpcode();
7795 OpIdx >=
get(Opcode).getNumOperands() ||
7796 get(Opcode).operands()[
OpIdx].RegClass == -1)
7800 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7804 if (!RI.isVGPRClass(CurrRC))
7807 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7809 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7810 Op.setSubReg(AMDGPU::lo16);
7811 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7821 Op.setReg(NewDstReg);
7834 assert(
MI->getOpcode() == AMDGPU::SI_CALL_ISEL &&
7835 "This only handle waterfall for SI_CALL_ISEL");
7842 while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
7845 while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
7850 while (End !=
MBB.end() && End->isCopy() &&
7851 MI->definesRegister(End->getOperand(1).getReg(), &RI))
7861 while (!Worklist.
empty()) {
7867 moveToVALUImpl(Worklist, MDT, Inst, WaterFalls, V2SPhyCopiesToErase);
7873 moveToVALUImpl(Worklist, MDT, *Inst, WaterFalls, V2SPhyCopiesToErase);
7875 "Deferred MachineInstr are not supposed to re-populate worklist");
7878 for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : WaterFalls) {
7879 if (Entry.first->getOpcode() == AMDGPU::SI_CALL_ISEL)
7881 Entry.second.SGPRs);
7884 for (std::pair<MachineInstr *, bool> Entry : V2SPhyCopiesToErase)
7886 Entry.first->eraseFromParent();
7894 if (SubRegIndices.
size() <= 1) {
7897 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7904 for (int16_t Indice : SubRegIndices) {
7907 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7914 get(AMDGPU::REG_SEQUENCE), DstReg);
7915 for (
unsigned i = 0; i < SubRegIndices.size(); ++i) {
7917 MIB.
addImm(RI.getSubRegFromChannel(i));
7927 if (DstReg == AMDGPU::M0) {
7940 if (
I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
7942 for (
unsigned i = 0; i <
UseMI->getNumOperands(); ++i) {
7943 if (
UseMI->getOperand(i).isReg() &&
7944 UseMI->getOperand(i).getReg() == DstReg) {
7948 V2SCopyInfo.MOs.push_back(MO);
7949 V2SCopyInfo.SGPRs.push_back(DstReg);
7953 }
else if (
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
7954 I->getOperand(0).isReg() &&
7955 I->getOperand(0).getReg() == DstReg) {
7958 }
else if (
I->readsRegister(DstReg, &RI)) {
7960 V2SPhyCopiesToErase[&Inst] =
false;
7962 if (
I->findRegisterDefOperand(DstReg, &RI))
7984 case AMDGPU::S_ADD_I32:
7985 case AMDGPU::S_SUB_I32: {
7989 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7997 case AMDGPU::S_MUL_U64:
7998 if (ST.hasVMulU64Inst()) {
7999 NewOpcode = AMDGPU::V_MUL_U64_e64;
8003 splitScalarSMulU64(Worklist, Inst, MDT);
8007 case AMDGPU::S_MUL_U64_U32_PSEUDO:
8008 case AMDGPU::S_MUL_I64_I32_PSEUDO:
8011 splitScalarSMulPseudo(Worklist, Inst, MDT);
8015 case AMDGPU::S_AND_B64:
8016 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
8020 case AMDGPU::S_OR_B64:
8021 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
8025 case AMDGPU::S_XOR_B64:
8026 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
8030 case AMDGPU::S_NAND_B64:
8031 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
8035 case AMDGPU::S_NOR_B64:
8036 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
8040 case AMDGPU::S_XNOR_B64:
8041 if (ST.hasDLInsts())
8042 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
8044 splitScalar64BitXnor(Worklist, Inst, MDT);
8048 case AMDGPU::S_ANDN2_B64:
8049 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
8053 case AMDGPU::S_ORN2_B64:
8054 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
8058 case AMDGPU::S_BREV_B64:
8059 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
8063 case AMDGPU::S_NOT_B64:
8064 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
8068 case AMDGPU::S_BCNT1_I32_B64:
8069 splitScalar64BitBCNT(Worklist, Inst);
8073 case AMDGPU::S_BFE_I64:
8074 splitScalar64BitBFE(Worklist, Inst);
8078 case AMDGPU::S_FLBIT_I32_B64:
8079 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
8082 case AMDGPU::S_FF1_I32_B64:
8083 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
8087 case AMDGPU::S_LSHL_B32:
8088 if (ST.hasOnlyRevVALUShifts()) {
8089 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
8093 case AMDGPU::S_ASHR_I32:
8094 if (ST.hasOnlyRevVALUShifts()) {
8095 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
8099 case AMDGPU::S_LSHR_B32:
8100 if (ST.hasOnlyRevVALUShifts()) {
8101 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
8105 case AMDGPU::S_LSHL_B64:
8106 if (ST.hasOnlyRevVALUShifts()) {
8108 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
8109 : AMDGPU::V_LSHLREV_B64_e64;
8113 case AMDGPU::S_ASHR_I64:
8114 if (ST.hasOnlyRevVALUShifts()) {
8115 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
8119 case AMDGPU::S_LSHR_B64:
8120 if (ST.hasOnlyRevVALUShifts()) {
8121 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
8126 case AMDGPU::S_ABS_I32:
8127 lowerScalarAbs(Worklist, Inst);
8131 case AMDGPU::S_ABSDIFF_I32:
8132 lowerScalarAbsDiff(Worklist, Inst);
8136 case AMDGPU::S_CBRANCH_SCC0:
8137 case AMDGPU::S_CBRANCH_SCC1: {
8140 bool IsSCC = CondReg == AMDGPU::SCC;
8148 case AMDGPU::S_BFE_U64:
8149 case AMDGPU::S_BFM_B64:
8152 case AMDGPU::S_PACK_LL_B32_B16:
8153 case AMDGPU::S_PACK_LH_B32_B16:
8154 case AMDGPU::S_PACK_HL_B32_B16:
8155 case AMDGPU::S_PACK_HH_B32_B16:
8156 movePackToVALU(Worklist, MRI, Inst);
8160 case AMDGPU::S_XNOR_B32:
8161 lowerScalarXnor(Worklist, Inst);
8165 case AMDGPU::S_NAND_B32:
8166 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8170 case AMDGPU::S_NOR_B32:
8171 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8175 case AMDGPU::S_ANDN2_B32:
8176 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8180 case AMDGPU::S_ORN2_B32:
8181 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8189 case AMDGPU::S_ADD_CO_PSEUDO:
8190 case AMDGPU::S_SUB_CO_PSEUDO: {
8191 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8192 ? AMDGPU::V_ADDC_U32_e64
8193 : AMDGPU::V_SUBB_U32_e64;
8194 const auto *CarryRC = RI.getWaveMaskRegClass();
8216 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8220 case AMDGPU::S_UADDO_PSEUDO:
8221 case AMDGPU::S_USUBO_PSEUDO: {
8227 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8228 ? AMDGPU::V_ADD_CO_U32_e64
8229 : AMDGPU::V_SUB_CO_U32_e64;
8241 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8245 case AMDGPU::S_LSHL1_ADD_U32:
8246 case AMDGPU::S_LSHL2_ADD_U32:
8247 case AMDGPU::S_LSHL3_ADD_U32:
8248 case AMDGPU::S_LSHL4_ADD_U32: {
8252 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8253 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8254 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8268 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8272 case AMDGPU::S_CSELECT_B32:
8273 case AMDGPU::S_CSELECT_B64:
8274 lowerSelect(Worklist, Inst, MDT);
8277 case AMDGPU::S_CMP_EQ_I32:
8278 case AMDGPU::S_CMP_LG_I32:
8279 case AMDGPU::S_CMP_GT_I32:
8280 case AMDGPU::S_CMP_GE_I32:
8281 case AMDGPU::S_CMP_LT_I32:
8282 case AMDGPU::S_CMP_LE_I32:
8283 case AMDGPU::S_CMP_EQ_U32:
8284 case AMDGPU::S_CMP_LG_U32:
8285 case AMDGPU::S_CMP_GT_U32:
8286 case AMDGPU::S_CMP_GE_U32:
8287 case AMDGPU::S_CMP_LT_U32:
8288 case AMDGPU::S_CMP_LE_U32:
8289 case AMDGPU::S_CMP_EQ_U64:
8290 case AMDGPU::S_CMP_LG_U64:
8291 case AMDGPU::S_CMP_LT_F32:
8292 case AMDGPU::S_CMP_EQ_F32:
8293 case AMDGPU::S_CMP_LE_F32:
8294 case AMDGPU::S_CMP_GT_F32:
8295 case AMDGPU::S_CMP_LG_F32:
8296 case AMDGPU::S_CMP_GE_F32:
8297 case AMDGPU::S_CMP_O_F32:
8298 case AMDGPU::S_CMP_U_F32:
8299 case AMDGPU::S_CMP_NGE_F32:
8300 case AMDGPU::S_CMP_NLG_F32:
8301 case AMDGPU::S_CMP_NGT_F32:
8302 case AMDGPU::S_CMP_NLE_F32:
8303 case AMDGPU::S_CMP_NEQ_F32:
8304 case AMDGPU::S_CMP_NLT_F32: {
8309 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8323 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8327 case AMDGPU::S_CMP_LT_F16:
8328 case AMDGPU::S_CMP_EQ_F16:
8329 case AMDGPU::S_CMP_LE_F16:
8330 case AMDGPU::S_CMP_GT_F16:
8331 case AMDGPU::S_CMP_LG_F16:
8332 case AMDGPU::S_CMP_GE_F16:
8333 case AMDGPU::S_CMP_O_F16:
8334 case AMDGPU::S_CMP_U_F16:
8335 case AMDGPU::S_CMP_NGE_F16:
8336 case AMDGPU::S_CMP_NLG_F16:
8337 case AMDGPU::S_CMP_NGT_F16:
8338 case AMDGPU::S_CMP_NLE_F16:
8339 case AMDGPU::S_CMP_NEQ_F16:
8340 case AMDGPU::S_CMP_NLT_F16: {
8363 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8367 case AMDGPU::S_CVT_HI_F32_F16: {
8370 if (ST.useRealTrue16Insts()) {
8375 .
addReg(TmpReg, {}, AMDGPU::hi16)
8391 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8395 case AMDGPU::S_MINIMUM_F32:
8396 case AMDGPU::S_MAXIMUM_F32: {
8408 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8412 case AMDGPU::S_MINIMUM_F16:
8413 case AMDGPU::S_MAXIMUM_F16: {
8415 ? &AMDGPU::VGPR_16RegClass
8416 : &AMDGPU::VGPR_32RegClass);
8428 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8432 case AMDGPU::V_S_EXP_F16_e64:
8433 case AMDGPU::V_S_LOG_F16_e64:
8434 case AMDGPU::V_S_RCP_F16_e64:
8435 case AMDGPU::V_S_RSQ_F16_e64:
8436 case AMDGPU::V_S_SQRT_F16_e64: {
8438 ? &AMDGPU::VGPR_16RegClass
8439 : &AMDGPU::VGPR_32RegClass);
8451 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8457 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8465 if (NewOpcode == Opcode) {
8472 V2SPhyCopiesToErase);
8480 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8487 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8518 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8522 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8528 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8535 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8537 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8542 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8550 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8560 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8561 AMDGPU::OpName::src0_modifiers) >= 0)
8565 NewInstr->addOperand(Src);
8568 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8571 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8573 NewInstr.addImm(
Size);
8574 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8578 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8583 "Scalar BFE is only implemented for constant width and offset");
8591 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8592 AMDGPU::OpName::src1_modifiers) >= 0)
8594 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8596 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8597 AMDGPU::OpName::src2_modifiers) >= 0)
8599 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8601 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8603 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8605 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8611 NewInstr->addOperand(
Op);
8618 if (
Op.getReg() == AMDGPU::SCC) {
8620 if (
Op.isDef() && !
Op.isDead())
8621 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8623 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8628 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8629 Register DstReg = NewInstr->getOperand(0).getReg();
8644 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8648std::pair<bool, MachineBasicBlock *>
8651 if (ST.hasAddNoCarryInsts()) {
8663 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8665 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8666 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8677 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8678 return std::pair(
true, NewBB);
8681 return std::pair(
false,
nullptr);
8698 bool IsSCC = (CondReg == AMDGPU::SCC);
8706 for (MachineOperand &UseMO :
8708 MachineInstr &
UseMI = *UseMO.getParent();
8709 switch (
UseMI.getOpcode()) {
8710 case AMDGPU::V_CNDMASK_B16_fake16_e32:
8711 case AMDGPU::V_CNDMASK_B16_fake16_e64:
8712 case AMDGPU::V_CNDMASK_B16_t16_e32:
8713 case AMDGPU::V_CNDMASK_B16_t16_e64:
8714 case AMDGPU::V_CNDMASK_B32_e32:
8715 case AMDGPU::V_CNDMASK_B32_e64:
8716 case AMDGPU::V_CNDMASK_B64_PSEUDO:
8717 if (UseMO.isImplicit() ||
8719 UseMO.setReg(CondReg);
8728 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8733 bool CopyFound =
false;
8734 for (MachineInstr &CandI :
8737 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8739 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8741 .
addReg(CandI.getOperand(1).getReg());
8753 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8762 MachineInstr *NewInst;
8763 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8764 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8779 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8794 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8795 : AMDGPU::V_SUB_CO_U32_e32;
8806 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8823 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8824 : AMDGPU::V_SUB_CO_U32_e32;
8837 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8851 if (ST.hasDLInsts()) {
8861 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8867 bool Src0IsSGPR = Src0.
isReg() &&
8869 bool Src1IsSGPR = Src1.
isReg() &&
8883 }
else if (Src1IsSGPR) {
8901 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8907 unsigned Opcode)
const {
8931 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8936 unsigned Opcode)
const {
8960 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8975 const MCInstrDesc &InstDesc =
get(Opcode);
8976 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8978 &AMDGPU::SGPR_32RegClass;
8980 const TargetRegisterClass *Src0SubRC =
8981 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8984 AMDGPU::sub0, Src0SubRC);
8987 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8988 const TargetRegisterClass *NewDestSubRC =
8989 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8992 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8995 AMDGPU::sub1, Src0SubRC);
8998 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
9012 Worklist.
insert(&LoHalf);
9013 Worklist.
insert(&HiHalf);
9019 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9042 const TargetRegisterClass *Src0SubRC =
9043 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9044 if (RI.isSGPRClass(Src0SubRC))
9045 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9046 const TargetRegisterClass *Src1SubRC =
9047 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9048 if (RI.isSGPRClass(Src1SubRC))
9049 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9053 MachineOperand Op0L =
9055 MachineOperand Op1L =
9057 MachineOperand Op0H =
9059 MachineOperand Op1H =
9078 MachineInstr *Op1L_Op0H =
9084 MachineInstr *Op1H_Op0L =
9090 MachineInstr *Carry =
9095 MachineInstr *LoHalf =
9105 MachineInstr *HiHalf =
9128 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9151 const TargetRegisterClass *Src0SubRC =
9152 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9153 if (RI.isSGPRClass(Src0SubRC))
9154 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9155 const TargetRegisterClass *Src1SubRC =
9156 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9157 if (RI.isSGPRClass(Src1SubRC))
9158 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9162 MachineOperand Op0L =
9164 MachineOperand Op1L =
9168 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9169 ? AMDGPU::V_MUL_HI_U32_e64
9170 : AMDGPU::V_MUL_HI_I32_e64;
9171 MachineInstr *HiHalf =
9174 MachineInstr *LoHalf =
9193 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9209 const MCInstrDesc &InstDesc =
get(Opcode);
9210 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9212 &AMDGPU::SGPR_32RegClass;
9214 const TargetRegisterClass *Src0SubRC =
9215 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9216 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9218 &AMDGPU::SGPR_32RegClass;
9220 const TargetRegisterClass *Src1SubRC =
9221 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9224 AMDGPU::sub0, Src0SubRC);
9226 AMDGPU::sub0, Src1SubRC);
9228 AMDGPU::sub1, Src0SubRC);
9230 AMDGPU::sub1, Src1SubRC);
9233 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9234 const TargetRegisterClass *NewDestSubRC =
9235 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9238 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9243 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9256 Worklist.
insert(&LoHalf);
9257 Worklist.
insert(&HiHalf);
9260 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9280 MachineOperand* Op0;
9281 MachineOperand* Op1;
9283 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9316 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9317 const TargetRegisterClass *SrcRC = Src.isReg() ?
9319 &AMDGPU::SGPR_32RegClass;
9324 const TargetRegisterClass *SrcSubRC =
9325 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9328 AMDGPU::sub0, SrcSubRC);
9330 AMDGPU::sub1, SrcSubRC);
9340 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9359 Offset == 0 &&
"Not implemented");
9382 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9392 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9395 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9401 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9420 const MCInstrDesc &InstDesc =
get(Opcode);
9422 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9423 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9424 : AMDGPU::V_ADD_CO_U32_e32;
9426 const TargetRegisterClass *SrcRC =
9427 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9428 const TargetRegisterClass *SrcSubRC =
9429 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9431 MachineOperand SrcRegSub0 =
9433 MachineOperand SrcRegSub1 =
9446 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9452 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9456 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9459void SIInstrInfo::addUsersToMoveToVALUWorklist(
9463 MachineInstr &
UseMI = *MO.getParent();
9467 switch (
UseMI.getOpcode()) {
9470 case AMDGPU::SOFT_WQM:
9471 case AMDGPU::STRICT_WWM:
9472 case AMDGPU::STRICT_WQM:
9473 case AMDGPU::REG_SEQUENCE:
9475 case AMDGPU::INSERT_SUBREG:
9478 OpNo = MO.getOperandNo();
9485 if (!RI.hasVectorRegisters(OpRC))
9502 if (ST.useRealTrue16Insts()) {
9504 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9507 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9513 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9516 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9525 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9527 case AMDGPU::S_PACK_LL_B32_B16:
9529 .addReg(SrcReg0, {},
9530 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9531 .addImm(AMDGPU::lo16)
9532 .addReg(SrcReg1, {},
9533 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9534 .addImm(AMDGPU::hi16);
9536 case AMDGPU::S_PACK_LH_B32_B16:
9538 .addReg(SrcReg0, {},
9539 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9540 .addImm(AMDGPU::lo16)
9541 .addReg(SrcReg1, {}, AMDGPU::hi16)
9542 .addImm(AMDGPU::hi16);
9544 case AMDGPU::S_PACK_HL_B32_B16:
9545 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9546 .addImm(AMDGPU::lo16)
9547 .addReg(SrcReg1, {},
9548 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9549 .addImm(AMDGPU::hi16);
9551 case AMDGPU::S_PACK_HH_B32_B16:
9552 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9553 .addImm(AMDGPU::lo16)
9554 .addReg(SrcReg1, {}, AMDGPU::hi16)
9555 .addImm(AMDGPU::hi16);
9563 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9568 case AMDGPU::S_PACK_LL_B32_B16: {
9587 case AMDGPU::S_PACK_LH_B32_B16: {
9597 case AMDGPU::S_PACK_HL_B32_B16: {
9608 case AMDGPU::S_PACK_HH_B32_B16: {
9628 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9637 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9638 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9639 SmallVector<MachineInstr *, 4> CopyToDelete;
9642 for (MachineInstr &
MI :
9646 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9649 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9650 Register DestReg =
MI.getOperand(0).getReg();
9657 MI.getOperand(SCCIdx).setReg(NewCond);
9663 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9666 for (
auto &Copy : CopyToDelete)
9667 Copy->eraseFromParent();
9675void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9681 for (MachineInstr &
MI :
9684 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9686 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9695 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9703 case AMDGPU::REG_SEQUENCE:
9704 case AMDGPU::INSERT_SUBREG:
9706 case AMDGPU::SOFT_WQM:
9707 case AMDGPU::STRICT_WWM:
9708 case AMDGPU::STRICT_WQM: {
9710 if (RI.isAGPRClass(SrcRC)) {
9711 if (RI.isAGPRClass(NewDstRC))
9716 case AMDGPU::REG_SEQUENCE:
9717 case AMDGPU::INSERT_SUBREG:
9718 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9721 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9727 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9730 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9744 int OpIndices[3])
const {
9745 const MCInstrDesc &
Desc =
MI.getDesc();
9761 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9763 for (
unsigned i = 0; i < 3; ++i) {
9764 int Idx = OpIndices[i];
9768 const MachineOperand &MO =
MI.getOperand(Idx);
9774 const TargetRegisterClass *OpRC =
9775 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9776 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9783 if (RI.isSGPRClass(RegRC))
9801 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9802 SGPRReg = UsedSGPRs[0];
9805 if (!SGPRReg && UsedSGPRs[1]) {
9806 if (UsedSGPRs[1] == UsedSGPRs[2])
9807 SGPRReg = UsedSGPRs[1];
9814 AMDGPU::OpName OperandName)
const {
9815 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9818 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9822 return &
MI.getOperand(Idx);
9836 if (ST.isAmdHsaOS()) {
9839 RsrcDataFormat |= (1ULL << 56);
9844 RsrcDataFormat |= (2ULL << 59);
9847 return RsrcDataFormat;
9857 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9862 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9869 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9875 unsigned Opc =
MI.getOpcode();
9881 return get(
Opc).mayLoad() &&
9888 if (!Addr || !Addr->
isFI())
9897 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9899 return MI.getOperand(VDataIdx).getReg();
9909 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9911 return MI.getOperand(DataIdx).getReg();
9945 unsigned Opc =
MI.getOpcode();
9947 unsigned DescSize =
Desc.getSize();
9952 unsigned Size = DescSize;
9956 if (
MI.isBranch() && ST.hasOffset3fBug())
9967 bool HasLiteral =
false;
9968 unsigned LiteralSize = 4;
9969 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9974 if (ST.has64BitLiterals()) {
9975 switch (OpInfo.OperandType) {
10000 return HasLiteral ? DescSize + LiteralSize : DescSize;
10005 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
10009 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
10010 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
10014 case TargetOpcode::BUNDLE:
10015 return getInstBundleSize(
MI);
10016 case TargetOpcode::INLINEASM:
10017 case TargetOpcode::INLINEASM_BR: {
10019 const char *AsmStr =
MI.getOperand(0).getSymbolName();
10023 if (
MI.isMetaInstruction())
10027 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
10030 unsigned LoInstOpcode = D16Info->LoOp;
10032 DescSize =
Desc.getSize();
10036 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
10039 DescSize =
Desc.getSize();
10048 if (
MI.isBranch() && ST.hasOffset3fBug())
10049 return InstSizeVerifyMode::NoVerify;
10050 return InstSizeVerifyMode::ExactSize;
10057 if (
MI.memoperands_empty())
10069 static const std::pair<int, const char *> TargetIndices[] = {
10108std::pair<unsigned, unsigned>
10115 static const std::pair<unsigned, const char *> TargetFlags[] = {
10133 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10149 return AMDGPU::WWM_COPY;
10151 return AMDGPU::COPY;
10168 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10172 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10173 return IsLRSplitInst;
10186 bool IsNullOrVectorRegister =
true;
10190 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10193 return IsNullOrVectorRegister &&
10195 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10196 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10204 if (ST.hasAddNoCarryInsts())
10220 if (ST.hasAddNoCarryInsts())
10224 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10226 : RS.scavengeRegisterBackwards(
10227 *RI.getBoolRC(),
I,
false,
10240 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10241 case AMDGPU::SI_KILL_I1_TERMINATOR:
10250 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10251 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10252 case AMDGPU::SI_KILL_I1_PSEUDO:
10253 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10265 const unsigned OffsetBits =
10267 return (1 << OffsetBits) - 1;
10271 if (!ST.isWave32())
10274 if (
MI.isInlineAsm())
10277 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10280 for (
auto &
Op :
MI.implicit_operands()) {
10281 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10282 Op.setReg(AMDGPU::VCC_LO);
10291 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10295 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10296 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10312 if (Imm > MaxImm) {
10313 if (Imm <= MaxImm + 64) {
10315 Overflow = Imm - MaxImm;
10334 if (Overflow > 0) {
10342 if (ST.hasRestrictedSOffset())
10347 SOffset = Overflow;
10385 if (!ST.hasFlatInstOffsets())
10389 if (ST.hasFlatSegmentOffsetBug() && FlatVariant == FlatAddrSpace::FLAT &&
10394 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10395 FlatVariant == FlatAddrSpace::FlatScratch &&
Offset < 0 &&
10406std::pair<int64_t, int64_t>
10409 int64_t RemainderOffset = COffsetVal;
10410 int64_t ImmField = 0;
10415 if (AllowNegative) {
10417 int64_t
D = 1LL << NumBits;
10418 RemainderOffset = (COffsetVal /
D) *
D;
10419 ImmField = COffsetVal - RemainderOffset;
10421 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10423 (ImmField % 4) != 0) {
10425 RemainderOffset += ImmField % 4;
10426 ImmField -= ImmField % 4;
10428 }
else if (COffsetVal >= 0) {
10430 RemainderOffset = COffsetVal - ImmField;
10434 assert(RemainderOffset + ImmField == COffsetVal);
10435 return {ImmField, RemainderOffset};
10440 if (ST.hasNegativeScratchOffsetBug() &&
10448 switch (ST.getGeneration()) {
10477 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10478 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10479 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10480 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10481 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10482 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10483 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10484 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10491#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10492 case OPCODE##_dpp: \
10493 case OPCODE##_e32: \
10494 case OPCODE##_e64: \
10495 case OPCODE##_e64_dpp: \
10496 case OPCODE##_sdwa:
10510 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10511 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10512 case AMDGPU::V_FMA_F16_gfx9_e64:
10513 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10514 case AMDGPU::V_INTERP_P2_F16:
10515 case AMDGPU::V_MAD_F16_e64:
10516 case AMDGPU::V_MAD_U16_e64:
10517 case AMDGPU::V_MAD_I16_e64:
10526 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10540 switch (ST.getGeneration()) {
10553 if (
isMAI(Opcode)) {
10561 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10564 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10571 if (ST.hasGFX90AInsts()) {
10572 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10573 if (ST.hasGFX940Insts())
10575 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10577 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10579 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10585 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10604 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10605 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10606 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10618 switch (
MI.getOpcode()) {
10620 case AMDGPU::REG_SEQUENCE:
10624 case AMDGPU::INSERT_SUBREG:
10625 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10642 if (!
P.Reg.isVirtual())
10647 while (
auto *
MI = DefInst) {
10649 switch (
MI->getOpcode()) {
10651 case AMDGPU::V_MOV_B32_e32: {
10652 auto &Op1 =
MI->getOperand(1);
10681 auto *DefBB =
DefMI.getParent();
10685 if (
UseMI.getParent() != DefBB)
10688 const int MaxInstScan = 20;
10692 auto E =
UseMI.getIterator();
10693 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10694 if (
I->isDebugInstr())
10697 if (++NumInst > MaxInstScan)
10700 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10713 auto *DefBB =
DefMI.getParent();
10715 const int MaxUseScan = 10;
10719 auto &UseInst = *
Use.getParent();
10722 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10725 if (++NumUse > MaxUseScan)
10732 const int MaxInstScan = 20;
10736 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10739 if (
I->isDebugInstr())
10742 if (++NumInst > MaxInstScan)
10755 if (Reg == VReg && --NumUse == 0)
10757 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10766 auto Cur =
MBB.begin();
10767 if (Cur !=
MBB.end())
10769 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10772 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10781 if (InsPt !=
MBB.end() &&
10782 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10783 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10784 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10785 InsPt->definesRegister(Src,
nullptr)) {
10789 .
addReg(Src, {}, SrcSubReg)
10832 if (isFullCopyInstr(
MI)) {
10833 Register DstReg =
MI.getOperand(0).getReg();
10834 Register SrcReg =
MI.getOperand(1).getReg();
10856 unsigned *PredCost)
const {
10857 if (
MI.isBundle()) {
10860 unsigned Lat = 0,
Count = 0;
10861 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10863 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10865 return Lat +
Count - 1;
10868 return SchedModel.computeInstrLatency(&
MI);
10875 return *CallAddrOp;
10882 unsigned Opcode =
MI.getOpcode();
10884 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10887 :
MI.getOperand(1).getReg();
10891 unsigned SrcAS = SrcTy.getAddressSpace();
10894 ST.hasGloballyAddressableScratch()
10902 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10903 return HandleAddrSpaceCast(
MI);
10906 auto IID = GI->getIntrinsicID();
10913 case Intrinsic::amdgcn_addrspacecast_nonnull:
10914 return HandleAddrSpaceCast(
MI);
10915 case Intrinsic::amdgcn_if:
10916 case Intrinsic::amdgcn_else:
10930 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10931 Opcode == AMDGPU::G_SEXTLOAD) {
10932 if (
MI.memoperands_empty())
10936 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10937 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10945 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10946 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10947 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10953 if (Opcode == TargetOpcode::G_DYN_STACKALLOC)
10956 if (Opcode == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
10964 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10965 return Formatter.get();
10973 unsigned opcode =
MI.getOpcode();
10974 if (opcode == AMDGPU::V_READLANE_B32 ||
10975 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10976 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10981 if (
MI.isInlineAsm()) {
10987 if (!RC || !RI.isSGPRClass(RC))
10992 if (isCopyInstr(
MI)) {
10996 RI.getPhysRegBaseClass(srcOp.
getReg());
11004 if (
MI.isPreISelOpcode())
11019 if (
MI.memoperands_empty())
11023 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
11024 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
11039 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
11041 if (!
SrcOp.isReg())
11045 if (!Reg || !
SrcOp.readsReg())
11051 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
11078 F,
"ds_ordered_count unsupported for this calling conv"));
11092 Register &SrcReg2, int64_t &CmpMask,
11093 int64_t &CmpValue)
const {
11094 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
11097 switch (
MI.getOpcode()) {
11100 case AMDGPU::S_CMP_EQ_U32:
11101 case AMDGPU::S_CMP_EQ_I32:
11102 case AMDGPU::S_CMP_LG_U32:
11103 case AMDGPU::S_CMP_LG_I32:
11104 case AMDGPU::S_CMP_LT_U32:
11105 case AMDGPU::S_CMP_LT_I32:
11106 case AMDGPU::S_CMP_GT_U32:
11107 case AMDGPU::S_CMP_GT_I32:
11108 case AMDGPU::S_CMP_LE_U32:
11109 case AMDGPU::S_CMP_LE_I32:
11110 case AMDGPU::S_CMP_GE_U32:
11111 case AMDGPU::S_CMP_GE_I32:
11112 case AMDGPU::S_CMP_EQ_U64:
11113 case AMDGPU::S_CMP_LG_U64:
11114 SrcReg =
MI.getOperand(0).getReg();
11115 if (
MI.getOperand(1).isReg()) {
11116 if (
MI.getOperand(1).getSubReg())
11118 SrcReg2 =
MI.getOperand(1).getReg();
11120 }
else if (
MI.getOperand(1).isImm()) {
11122 CmpValue =
MI.getOperand(1).getImm();
11128 case AMDGPU::S_CMPK_EQ_U32:
11129 case AMDGPU::S_CMPK_EQ_I32:
11130 case AMDGPU::S_CMPK_LG_U32:
11131 case AMDGPU::S_CMPK_LG_I32:
11132 case AMDGPU::S_CMPK_LT_U32:
11133 case AMDGPU::S_CMPK_LT_I32:
11134 case AMDGPU::S_CMPK_GT_U32:
11135 case AMDGPU::S_CMPK_GT_I32:
11136 case AMDGPU::S_CMPK_LE_U32:
11137 case AMDGPU::S_CMPK_LE_I32:
11138 case AMDGPU::S_CMPK_GE_U32:
11139 case AMDGPU::S_CMPK_GE_I32:
11140 SrcReg =
MI.getOperand(0).getReg();
11142 CmpValue =
MI.getOperand(1).getImm();
11152 if (S->isLiveIn(AMDGPU::SCC))
11161bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
11164 bool SCCIsDead =
false;
11167 constexpr unsigned ScanLimit = 12;
11168 unsigned Count = 0;
11169 for (MachineInstr &
MI :
11171 if (++
Count > ScanLimit)
11173 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
11174 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
11175 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
11176 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11177 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11182 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11195 for (MachineInstr *
MI : InvertInstr) {
11196 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11197 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11199 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11200 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11201 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11202 ? AMDGPU::S_CBRANCH_SCC1
11203 : AMDGPU::S_CBRANCH_SCC0));
11216 bool NeedInversion)
const {
11217 MachineInstr *KillsSCC =
nullptr;
11222 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11224 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11227 if (NeedInversion && !invertSCCUse(SCCRedefine))
11229 if (MachineOperand *SccDef =
11231 SccDef->setIsDead(
false);
11239 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11240 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11242 bool Op1IsNonZeroImm =
11243 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11244 bool Op2IsZeroImm =
11245 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11246 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11252 unsigned &NewDefOpc) {
11255 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11256 Def.getOpcode() != AMDGPU::S_ADD_U32)
11262 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11268 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11270 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11273 NewDefOpc = AMDGPU::S_ADD_U32;
11275 NeedInversion = !NeedInversion;
11280 Register SrcReg2, int64_t CmpMask,
11289 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11290 this](
bool NeedInversion) ->
bool {
11314 unsigned NewDefOpc = Def->getOpcode();
11320 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11323 if (NewDefOpc != Def->getOpcode())
11324 Def->setDesc(
get(NewDefOpc));
11333 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11340 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11348 optimizeSCC(
Select, Def,
false);
11355 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11356 this](int64_t ExpectedValue,
unsigned SrcSize,
11357 bool IsReversible,
bool IsSigned) ->
bool {
11385 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11386 Def->getOpcode() != AMDGPU::S_AND_B64)
11390 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11401 SrcOp = &Def->getOperand(2);
11402 else if (isMask(&Def->getOperand(2)))
11403 SrcOp = &Def->getOperand(1);
11411 if (IsSigned && BitNo == SrcSize - 1)
11414 ExpectedValue <<= BitNo;
11416 bool IsReversedCC =
false;
11417 if (CmpValue != ExpectedValue) {
11420 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11425 Register DefReg = Def->getOperand(0).getReg();
11429 if (!optimizeSCC(Def, &CmpInstr,
false))
11440 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11441 : AMDGPU::S_BITCMP1_B32
11442 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11443 : AMDGPU::S_BITCMP1_B64;
11448 Def->eraseFromParent();
11456 case AMDGPU::S_CMP_EQ_U32:
11457 case AMDGPU::S_CMP_EQ_I32:
11458 case AMDGPU::S_CMPK_EQ_U32:
11459 case AMDGPU::S_CMPK_EQ_I32:
11460 return optimizeCmpAnd(1, 32,
true,
false) ||
11461 optimizeCmpSelect(
true);
11462 case AMDGPU::S_CMP_GE_U32:
11463 case AMDGPU::S_CMPK_GE_U32:
11464 return optimizeCmpAnd(1, 32,
false,
false);
11465 case AMDGPU::S_CMP_GE_I32:
11466 case AMDGPU::S_CMPK_GE_I32:
11467 return optimizeCmpAnd(1, 32,
false,
true);
11468 case AMDGPU::S_CMP_EQ_U64:
11469 return optimizeCmpAnd(1, 64,
true,
false);
11470 case AMDGPU::S_CMP_LG_U32:
11471 case AMDGPU::S_CMP_LG_I32:
11472 case AMDGPU::S_CMPK_LG_U32:
11473 case AMDGPU::S_CMPK_LG_I32:
11474 return optimizeCmpAnd(0, 32,
true,
false) ||
11475 optimizeCmpSelect(
false);
11476 case AMDGPU::S_CMP_GT_U32:
11477 case AMDGPU::S_CMPK_GT_U32:
11478 return optimizeCmpAnd(0, 32,
false,
false);
11479 case AMDGPU::S_CMP_GT_I32:
11480 case AMDGPU::S_CMPK_GT_I32:
11481 return optimizeCmpAnd(0, 32,
false,
true);
11482 case AMDGPU::S_CMP_LG_U64:
11483 return optimizeCmpAnd(0, 64,
true,
false) ||
11484 optimizeCmpSelect(
false);
11491 AMDGPU::OpName
OpName)
const {
11492 if (!ST.needsAlignedVGPRs())
11495 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11507 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11509 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11513 : &AMDGPU::VReg_64_Align2RegClass);
11515 .
addReg(DataReg, {},
Op.getSubReg())
11520 Op.setSubReg(AMDGPU::sub0);
11535 if (ST.hasGFX1250Insts())
11542 unsigned Opcode =
MI.getOpcode();
11548 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11549 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11552 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static MachineBasicBlock * generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr, ArrayRef< Register > PhySGPRs={})
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static constexpr AMDGPU::OpName ModifierOpNames[]
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &PredBB, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={})
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned AndN2WrExecOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
uint64_t getZExtValue() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
void storeRegToStackSlotCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
static bool isXcntDrain(const MachineInstr &MI)
True if MI implicitly drains XCNT.
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
InstSizeVerifyMode getInstSizeVerifyMode(const MachineInstr &MI) const override
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool hasRAWDependency(const MachineInstr &FirstMI, const MachineInstr &SecondMI) const
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
void handleCopyToPhysHelper(SIInstrWorklist &Worklist, Register DstReg, MachineInstr &Inst, MachineRegisterInfo &MRI, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool isVOPDAntidependencyAllowed(const MachineInstr &MI) const
If OpX is multicycle, anti-dependencies are not allowed.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
static bool isVALU(const MachineInstr &MI, bool AllowLDSDMA)
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void createWaterFallForSiCall(MachineInstr *MI, MachineDominatorTree *MDT, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={}) const
Wrapper function for generating waterfall for instruction MI This function take into consideration of...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isLegalGFX12PlusPackedMathFP32or64BitOperand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 or 64 instructions.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI, bool NeedsCFI) const
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void createReadFirstLaneFromCopyToPhysReg(MachineRegisterInfo &MRI, Register DstReg, MachineInstr &Inst) const
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool allowNegativeFlatOffset(AMDGPU::FlatAddrSpace FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool isPackedFP32or64BitInst(unsigned Opc)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
constexpr bool isD16Buf(const T &...O)
constexpr bool isSDWA(const T &...O)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.